mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-11 12:03:09 +00:00
[DS-2629] Add ability to filter Excel (xls and xlsx) files for full text searching small tweaks:
* Ensured compilation against latest master * Removed the verbose print extracted text as it would really clutter the output * Compressed the string to inputStream from 3 lines into a single one. * Removed obsolete constructor call to the "ExtractorFactory" * Removed a TODO that I verified
This commit is contained in:
@@ -7,15 +7,17 @@
|
|||||||
*/
|
*/
|
||||||
package org.dspace.app.mediafilter;
|
package org.dspace.app.mediafilter;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.extractor.ExtractorFactory;
|
import org.apache.poi.extractor.ExtractorFactory;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.dspace.content.Item;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ExcelFilter
|
* ExcelFilter
|
||||||
@@ -55,7 +57,7 @@ public class ExcelFilter extends MediaFilter
|
|||||||
/**
|
/**
|
||||||
* @return String bitstream format
|
* @return String bitstream format
|
||||||
*
|
*
|
||||||
* TODO: Check that this is correct
|
*
|
||||||
*/
|
*/
|
||||||
public String getFormatString()
|
public String getFormatString()
|
||||||
{
|
{
|
||||||
@@ -76,19 +78,24 @@ public class ExcelFilter extends MediaFilter
|
|||||||
*
|
*
|
||||||
* @return InputStream the resulting input stream
|
* @return InputStream the resulting input stream
|
||||||
*/
|
*/
|
||||||
public InputStream getDestinationStream(InputStream source)
|
public InputStream getDestinationStream(Item item, InputStream source, boolean verbose)
|
||||||
throws Exception
|
throws Exception
|
||||||
{
|
{
|
||||||
String extractedText = null;
|
String extractedText = null;
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
new ExtractorFactory();
|
|
||||||
POITextExtractor theExtractor = ExtractorFactory.createExtractor(source);
|
POITextExtractor theExtractor = ExtractorFactory.createExtractor(source);
|
||||||
if (theExtractor instanceof ExcelExtractor) // for xls file
|
if (theExtractor instanceof ExcelExtractor)
|
||||||
extractedText = ((ExcelExtractor ) theExtractor).getText();
|
{
|
||||||
else if (theExtractor instanceof XSSFExcelExtractor) // for xlsx file
|
// for xls file
|
||||||
extractedText = ((XSSFExcelExtractor ) theExtractor).getText();
|
extractedText = (theExtractor).getText();
|
||||||
|
}
|
||||||
|
else if (theExtractor instanceof XSSFExcelExtractor)
|
||||||
|
{
|
||||||
|
// for xlsx file
|
||||||
|
extractedText = (theExtractor).getText();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
@@ -98,18 +105,8 @@ public class ExcelFilter extends MediaFilter
|
|||||||
|
|
||||||
if (extractedText != null)
|
if (extractedText != null)
|
||||||
{
|
{
|
||||||
// if verbose flag is set, print out extracted text
|
|
||||||
// to STDOUT
|
|
||||||
if (MediaFilterManager.isVerbose)
|
|
||||||
{
|
|
||||||
System.out.println(extractedText);
|
|
||||||
}
|
|
||||||
|
|
||||||
// generate an input stream with the extracted text
|
// generate an input stream with the extracted text
|
||||||
byte[] textBytes = extractedText.getBytes();
|
return IOUtils.toInputStream(extractedText, StandardCharsets.UTF_8);
|
||||||
ByteArrayInputStream bais = new ByteArrayInputStream(textBytes);
|
|
||||||
|
|
||||||
return bais;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
Reference in New Issue
Block a user