mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 01:54:22 +00:00
Merge pull request #1236 from KevinVdV/DS-2629-excel-media-filter
Ds 2629 excel media filter
This commit is contained in:
@@ -0,0 +1,114 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.app.mediafilter;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.extractor.ExtractorFactory;
|
||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.dspace.content.Item;
|
||||
|
||||
/*
|
||||
* ExcelFilter
|
||||
*
|
||||
* Entries you must add to dspace.cfg:
|
||||
*
|
||||
* filter.plugins = blah, \
|
||||
* Excel Text Extractor
|
||||
*
|
||||
* plugin.named.org.dspace.app.mediafilter.FormatFilter = \
|
||||
* blah = blah, \
|
||||
* org.dspace.app.mediafilter.ExcelFilter = Excel Text Extractor
|
||||
*
|
||||
* #Configure each filter's input Formats
|
||||
* filter.org.dspace.app.mediafilter.ExcelFilter.inputFormats = Microsoft Excel, Microsoft Excel XML
|
||||
*
|
||||
*/
|
||||
public class ExcelFilter extends MediaFilter
|
||||
{
|
||||
|
||||
private static Logger log = Logger.getLogger(ExcelFilter.class);
|
||||
|
||||
public String getFilteredName(String oldFilename)
|
||||
{
|
||||
return oldFilename + ".txt";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return String bundle name
|
||||
*
|
||||
*/
|
||||
public String getBundleName()
|
||||
{
|
||||
return "TEXT";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return String bitstream format
|
||||
*
|
||||
*
|
||||
*/
|
||||
public String getFormatString()
|
||||
{
|
||||
return "Text";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return String description
|
||||
*/
|
||||
public String getDescription()
|
||||
{
|
||||
return "Extracted text";
|
||||
}
|
||||
|
||||
/**
|
||||
* @param source
|
||||
* source input stream
|
||||
*
|
||||
* @return InputStream the resulting input stream
|
||||
*/
|
||||
public InputStream getDestinationStream(Item item, InputStream source, boolean verbose)
|
||||
throws Exception
|
||||
{
|
||||
String extractedText = null;
|
||||
|
||||
try
|
||||
{
|
||||
POITextExtractor theExtractor = ExtractorFactory.createExtractor(source);
|
||||
if (theExtractor instanceof ExcelExtractor)
|
||||
{
|
||||
// for xls file
|
||||
extractedText = (theExtractor).getText();
|
||||
}
|
||||
else if (theExtractor instanceof XSSFExcelExtractor)
|
||||
{
|
||||
// for xlsx file
|
||||
extractedText = (theExtractor).getText();
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
log.error("Error filtering bitstream: " + e.getMessage(), e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
if (extractedText != null)
|
||||
{
|
||||
// generate an input stream with the extracted text
|
||||
return IOUtils.toInputStream(extractedText, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -332,7 +332,8 @@ http.proxy.port = ${http.proxy.port}
|
||||
#Names of the enabled MediaFilter or FormatFilter plugins
|
||||
filter.plugins = PDF Text Extractor, HTML Text Extractor, \
|
||||
PowerPoint Text Extractor, \
|
||||
Word Text Extractor, JPEG Thumbnail
|
||||
Word Text Extractor, JPEG Thumbnail, \
|
||||
Excel Text Extractor
|
||||
|
||||
# [To enable Branded Preview]: uncomment and insert the following into the plugin list
|
||||
# Branded Preview JPEG, \
|
||||
@@ -351,7 +352,8 @@ plugin.named.org.dspace.app.mediafilter.FormatFilter = \
|
||||
org.dspace.app.mediafilter.JPEGFilter = JPEG Thumbnail, \
|
||||
org.dspace.app.mediafilter.BrandedPreviewJPEGFilter = Branded Preview JPEG, \
|
||||
org.dspace.app.mediafilter.ImageMagickImageThumbnailFilter = ImageMagick Image Thumbnail, \
|
||||
org.dspace.app.mediafilter.ImageMagickPdfThumbnailFilter = ImageMagick PDF Thumbnail
|
||||
org.dspace.app.mediafilter.ImageMagickPdfThumbnailFilter = ImageMagick PDF Thumbnail, \
|
||||
org.dspace.app.mediafilter.ExcelFilter = Excel Text Extractor
|
||||
|
||||
#Configure each filter's input format(s)
|
||||
filter.org.dspace.app.mediafilter.PDFFilter.inputFormats = Adobe PDF
|
||||
@@ -362,6 +364,7 @@ filter.org.dspace.app.mediafilter.JPEGFilter.inputFormats = BMP, GIF, JPEG, imag
|
||||
filter.org.dspace.app.mediafilter.BrandedPreviewJPEGFilter.inputFormats = BMP, GIF, JPEG, image/png
|
||||
filter.org.dspace.app.mediafilter.ImageMagickImageThumbnailFilter.inputFormats = BMP, GIF, image/png, JPG, TIFF, JPEG, JPEG 2000
|
||||
filter.org.dspace.app.mediafilter.ImageMagickPdfThumbnailFilter.inputFormats = Adobe PDF
|
||||
filter.org.dspace.app.mediafilter.ExcelFilter.inputFormats = Microsoft Excel, Microsoft Excel XML
|
||||
|
||||
#Publicly accessible thumbnails of restricted content.
|
||||
#List the MediaFilter name's that would get publicly accessible permissions
|
||||
|
Reference in New Issue
Block a user