[DS-2629] Add ability to filter Excel (xls and xlsx) files for full text searching small tweaks:

* Ensured compilation against latest master
* Removed the verbose print extracted text as it would really clutter the output
* Compressed the string to inputStream from 3 lines into a single one.
* Removed obsolete constructor call to the "ExtractorFactory"
* Removed a TODO that I verified
This commit is contained in:
KevinVdV
2016-01-07 12:27:45 +01:00
parent a052516b49
commit e51ba3f5b6

View File

@@ -7,15 +7,17 @@
*/
package org.dspace.app.mediafilter;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.log4j.Logger;
import org.dspace.content.Item;
/*
* ExcelFilter
@@ -55,7 +57,7 @@ public class ExcelFilter extends MediaFilter
/**
* @return String bitstream format
*
* TODO: Check that this is correct
*
*/
public String getFormatString()
{
@@ -76,19 +78,24 @@ public class ExcelFilter extends MediaFilter
*
* @return InputStream the resulting input stream
*/
public InputStream getDestinationStream(InputStream source)
public InputStream getDestinationStream(Item item, InputStream source, boolean verbose)
throws Exception
{
String extractedText = null;
try
{
new ExtractorFactory();
POITextExtractor theExtractor = ExtractorFactory.createExtractor(source);
if (theExtractor instanceof ExcelExtractor) // for xls file
extractedText = ((ExcelExtractor ) theExtractor).getText();
else if (theExtractor instanceof XSSFExcelExtractor) // for xlsx file
extractedText = ((XSSFExcelExtractor ) theExtractor).getText();
if (theExtractor instanceof ExcelExtractor)
{
// for xls file
extractedText = (theExtractor).getText();
}
else if (theExtractor instanceof XSSFExcelExtractor)
{
// for xlsx file
extractedText = (theExtractor).getText();
}
}
catch (Exception e)
{
@@ -98,18 +105,8 @@ public class ExcelFilter extends MediaFilter
if (extractedText != null)
{
// if verbose flag is set, print out extracted text
// to STDOUT
if (MediaFilterManager.isVerbose)
{
System.out.println(extractedText);
}
// generate an input stream with the extracted text
byte[] textBytes = extractedText.getBytes();
ByteArrayInputStream bais = new ByteArrayInputStream(textBytes);
return bais;
return IOUtils.toInputStream(extractedText, StandardCharsets.UTF_8);
}
return null;