diff --git a/dspace-api/src/main/java/org/dspace/app/mediafilter/XPDF2Text.java b/dspace-api/src/main/java/org/dspace/app/mediafilter/XPDF2Text.java deleted file mode 100644 index b3369b48d5..0000000000 --- a/dspace-api/src/main/java/org/dspace/app/mediafilter/XPDF2Text.java +++ /dev/null @@ -1,166 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.app.mediafilter; - -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; - -import org.apache.log4j.Logger; -import org.dspace.content.Item; -import org.dspace.core.ConfigurationManager; -import org.dspace.core.Utils; - -/** - * Text MediaFilter for PDF sources - * - * This filter produces extracted text suitable for building an index, - * but not for display to end users. - * It forks a process running the "pdftotext" program from the - * XPdf suite -- see http://www.foolabs.com/xpdf/ - * This is a suite of open-source PDF tools that has been widely ported - * to Unix platforms and the ones we use (pdftoppm, pdftotext) even - * run on Win32. - * - * This was written for the FACADE project but it is not directly connected - * to any of the other FACADE-specific software. The FACADE UI expects - * to find thumbnail images for 3D PDFs generated by this filter. - * - * Requires DSpace config properties keys: - * - * xpdf.path.pdftotext -- path to "pdftotext" executable (required!) - * - * @author Larry Stone - * @see org.dspace.app.mediafilter.MediaFilter - */ -public class XPDF2Text extends MediaFilter -{ - private static Logger log = Logger.getLogger(XPDF2Text.class); - - // Command to get text from pdf; @infile@, @COMMAND@ are placeholders - protected static final String XPDF_PDFTOTEXT_COMMAND[] = - { - "@COMMAND@", "-q", "-enc", "UTF-8", "@infile@", "-" - }; - - - // executable path that comes from DSpace config at runtime. - private String pdftotextPath = null; - - @Override - public String getFilteredName(String oldFilename) - { - return oldFilename + ".txt"; - } - - @Override - public String getBundleName() - { - return "TEXT"; - } - - @Override - public String getFormatString() - { - return "Text"; - } - - @Override - public String getDescription() - { - return "Extracted Text"; - } - - @Override - public InputStream getDestinationStream(Item currentItem, InputStream sourceStream, boolean verbose) - throws Exception - { - // get configured value for path to XPDF command: - if (pdftotextPath == null) - { - pdftotextPath = ConfigurationManager.getProperty("xpdf.path.pdftotext"); - if (pdftotextPath == null) - { - throw new IllegalStateException("No value for key \"xpdf.path.pdftotext\" in DSpace configuration! Should be path to XPDF pdftotext executable."); - } - } - - File sourceTmp = File.createTempFile("DSfilt",".pdf"); - sourceTmp.deleteOnExit(); // extra insurance, we'll delete it here. - int status = -1; - try - { - // make local temp copy of source PDF since PDF tools - // require a file for random access. - // XXX fixme could optimize if we ever get an interface to grab asset *files* - OutputStream sto = new FileOutputStream(sourceTmp); - Utils.copy(sourceStream, sto); - sto.close(); - sourceStream.close(); - - String pdfCmd[] = XPDF_PDFTOTEXT_COMMAND.clone(); - pdfCmd[0] = pdftotextPath; - pdfCmd[4] = sourceTmp.toString(); - - log.debug("Running command: "+Arrays.deepToString(pdfCmd)); - Process pdfProc = Runtime.getRuntime().exec(pdfCmd); - InputStream stdout = pdfProc.getInputStream(); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Utils.copy(new BufferedInputStream(stdout), baos); - stdout.close(); - baos.close(); - - status = pdfProc.waitFor(); - String msg = null; - if (status == 1) - { - msg = "pdftotext failed opening input: file=" + sourceTmp.toString(); - } - else if (status == 3) - { - msg = "pdftotext permission failure (perhaps copying of text from this document is not allowed - check PDF file's internal permissions): file=" + sourceTmp.toString(); - } - else if (status != 0) - { - msg = "pdftotext failed, maybe corrupt PDF? status=" + String.valueOf(status); - } - - if (msg != null) - { - log.error(msg); - throw new IOException(msg); - } - - return new ByteArrayInputStream(baos.toByteArray()); - } - catch (InterruptedException e) - { - log.error("Failed in pdftotext subprocess: ",e); - throw e; - } - finally - { - if (!sourceTmp.delete()) - { - log.error("Unable to delete temporary file"); - } - if (status != 0) - { - log.error("PDF conversion proc failed, returns=" + status + ", file=" + sourceTmp); - } - } - } -} - - diff --git a/dspace-api/src/main/java/org/dspace/app/mediafilter/XPDF2Thumbnail.java b/dspace-api/src/main/java/org/dspace/app/mediafilter/XPDF2Thumbnail.java deleted file mode 100644 index ed1dd9e847..0000000000 --- a/dspace-api/src/main/java/org/dspace/app/mediafilter/XPDF2Thumbnail.java +++ /dev/null @@ -1,520 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.app.mediafilter; - -import java.awt.Graphics2D; -import java.awt.Color; -import java.awt.image.*; -import java.awt.RenderingHints; -import java.awt.Transparency; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.regex.MatchResult; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.imageio.ImageIO; - -import org.apache.log4j.Logger; -import org.dspace.content.Item; -import org.dspace.core.ConfigurationManager; -import org.dspace.core.Utils; - -/** - * Thumbnail MediaFilter for PDF sources - * - * This filter generates thumbnail images for PDF documents, _including_ - * 3D PDF documents with 2D "poster" images. Since the PDFBox library - * does not understand these, and fails to render a lot of other PDFs, - * this filter forks a process running the "pdftoppm" program from the - * XPdf suite -- see http://www.foolabs.com/xpdf/ - * This is a suite of open-source PDF tools that has been widely ported - * to Unix platforms and the ones we use (pdftoppm, pdfinfo) even - * run on Win32. - * - * This was written for the FACADE project but it is not directly connected - * to any of the other FACADE-specific software. The FACADE UI expects - * to find thumbnail images for 3D PDFs generated by this filter. - * - * Requires DSpace config properties keys: - * - * xpdf.path.pdftoppm -- absolute path to "pdftoppm" executable (required!) - * xpdf.path.pdfinfo -- absolute path to "pdfinfo" executable (required!) - * thumbnail.maxwidth -- borrowed from thumbnails, max dim of generated image - * - * @author Larry Stone - * @see org.dspace.app.mediafilter.MediaFilter - */ -public class XPDF2Thumbnail extends MediaFilter -{ - private static Logger log = Logger.getLogger(XPDF2Thumbnail.class); - - // maximum size of either preview image dimension - protected static final int MAX_PX = 800; - - // maxium DPI - use common screen res, 100dpi. - protected static final int MAX_DPI = 100; - - // command to get image from PDF; @FILE@, @OUTPUT@ are placeholders - protected static final String XPDF_PDFTOPPM_COMMAND[] = - { - "@COMMAND@", "-q", "-f", "1", "-l", "1", - "-r", "@DPI@", "@FILE@", "@OUTPUTFILE@" - }; - - // command to get image from PDF; @FILE@, @OUTPUT@ are placeholders - protected static final String XPDF_PDFINFO_COMMAND[] = - { - "@COMMAND@", "-f", "1", "-l", "1", "-box", "@FILE@" - }; - - // executable path for "pdftoppm", comes from DSpace config at runtime. - protected String pdftoppmPath = null; - - // executable path for "pdfinfo", comes from DSpace config at runtime. - protected String pdfinfoPath = null; - - // match line in pdfinfo output that describes file's MediaBox - protected static final Pattern MEDIABOX_PATT = Pattern.compile( - "^Page\\s+\\d+\\s+MediaBox:\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)"); - - // also from thumbnail.maxwidth in config - protected int xmax = 0; - - // backup default for size, on the large side. - protected static final int DEFAULT_XMAX = 500; - - @Override - public String getFilteredName(String oldFilename) - { - return oldFilename + ".jpg"; - } - - @Override - public String getBundleName() - { - return "THUMBNAIL"; - } - - @Override - public String getFormatString() - { - return "JPEG"; - } - - @Override - public String getDescription() - { - return "Generated Thumbnail"; - } - - // canonical MediaFilter method to generate the thumbnail as stream. - @Override - public InputStream getDestinationStream(Item currentItem, InputStream sourceStream, boolean verbose) - throws Exception - { - // get config params - float xmax = (float) ConfigurationManager - .getIntProperty("thumbnail.maxwidth"); - float ymax = (float) ConfigurationManager - .getIntProperty("thumbnail.maxheight"); - boolean blurring = (boolean) ConfigurationManager - .getBooleanProperty("thumbnail.blurring"); - boolean hqscaling = (boolean) ConfigurationManager - .getBooleanProperty("thumbnail.hqscaling"); - - // sanity check: xpdf paths are required. can cache since it won't change - if (pdftoppmPath == null || pdfinfoPath == null) - { - pdftoppmPath = ConfigurationManager.getProperty("xpdf.path.pdftoppm"); - pdfinfoPath = ConfigurationManager.getProperty("xpdf.path.pdfinfo"); - if (pdftoppmPath == null) - { - throw new IllegalStateException("No value for key \"xpdf.path.pdftoppm\" in DSpace configuration! Should be path to XPDF pdftoppm executable."); - } - if (pdfinfoPath == null) - { - throw new IllegalStateException("No value for key \"xpdf.path.pdfinfo\" in DSpace configuration! Should be path to XPDF pdfinfo executable."); - } - - if (xmax == 0) - { - xmax = DEFAULT_XMAX; - } - } - - // make local file copy of source PDF since the PDF tools - // require a file for random access. - // XXX fixme would be nice to optimize this if we ever get - // XXX a DSpace method to access (optionally!) the _file_ of - // a Bitstream in the asset store, only when there is one of course. - File sourceTmp = File.createTempFile("DSfilt",".pdf"); - sourceTmp.deleteOnExit(); - int status = 0; - BufferedImage source = null; - try - { - OutputStream sto = new FileOutputStream(sourceTmp); - Utils.copy(sourceStream, sto); - sto.close(); - sourceStream.close(); - - // First get max physical dim of bounding box of first page - // to compute the DPI to ask for.. otherwise some AutoCAD - // drawings can produce enormous files even at 75dpi, for - // 48" drawings.. - - // run pdfinfo, look for MediaBox description in the output, e.g. - // "Page 1 MediaBox: 0.00 0.00 612.00 792.00" - // - int dpi = 0; - String pdfinfoCmd[] = XPDF_PDFINFO_COMMAND.clone(); - pdfinfoCmd[0] = pdfinfoPath; - pdfinfoCmd[pdfinfoCmd.length-1] = sourceTmp.toString(); - BufferedReader lr = null; - try - { - MatchResult mediaBox = null; - Process pdfProc = Runtime.getRuntime().exec(pdfinfoCmd); - lr = new BufferedReader(new InputStreamReader(pdfProc.getInputStream())); - String line; - for (line = lr.readLine(); line != null; line = lr.readLine()) - { - // if (line.matches(MEDIABOX_PATT)) - Matcher mm = MEDIABOX_PATT.matcher(line); - if (mm.matches()) - { - mediaBox = mm.toMatchResult(); - } - } - int istatus = pdfProc.waitFor(); - if (istatus != 0) - { - log.error("XPDF pdfinfo proc failed, exit status=" + istatus + ", file=" + sourceTmp); - } - if (mediaBox == null) - { - log.error("Sanity check: Did not find \"MediaBox\" line in output of XPDF pdfinfo, file="+sourceTmp); - throw new IllegalArgumentException("Failed to get MediaBox of PDF with pdfinfo, cannot compute thumbnail."); - } - else - { - double x0 = Double.parseDouble(mediaBox.group(1)); - double y0 = Double.parseDouble(mediaBox.group(2)); - double x1 = Double.parseDouble(mediaBox.group(3)); - double y1 = Double.parseDouble(mediaBox.group(4)); - int maxdim = (int)Math.max(Math.abs(x1 - x0), Math.abs(y1 - y0)); - dpi = Math.min(MAX_DPI, (MAX_PX * 72 / maxdim)); - log.debug("DPI: pdfinfo method got dpi="+dpi+" for max dim="+maxdim+" (points, 1/72\")"); - } - } - catch (InterruptedException e) - { - log.error("Failed transforming file for preview: ",e); - throw new IllegalArgumentException("Failed transforming file for thumbnail: ",e); - } - catch (NumberFormatException e) - { - log.error("Failed interpreting pdfinfo results, check regexp: ",e); - throw new IllegalArgumentException("Failed transforming file for thumbnail: ",e); - } - finally - { - if (lr != null) - { - lr.close(); - } - } - - // Render page 1 using xpdf's pdftoppm - // Requires Sun JAI imageio additions to read ppm directly. - // this will get "-000001.ppm" appended to it by pdftoppm - File outPrefixF = File.createTempFile("prevu","out"); - String outPrefix = outPrefixF.toString(); - if (!outPrefixF.delete()) - { - log.error("Unable to delete output file"); - } - String pdfCmd[] = XPDF_PDFTOPPM_COMMAND.clone(); - pdfCmd[0] = pdftoppmPath; - pdfCmd[pdfCmd.length-3] = String.valueOf(dpi); - pdfCmd[pdfCmd.length-2] = sourceTmp.toString(); - pdfCmd[pdfCmd.length-1] = outPrefix; - File outf = new File(outPrefix+"-000001.ppm"); - log.debug("Running xpdf command: "+Arrays.deepToString(pdfCmd)); - try - { - Process pdfProc = Runtime.getRuntime().exec(pdfCmd); - status = pdfProc.waitFor(); - if (!outf.exists()) outf = new File(outPrefix+"-00001.ppm"); - if (!outf.exists()) outf = new File(outPrefix+"-0001.ppm"); - if (!outf.exists()) outf = new File(outPrefix+"-001.ppm"); - if (!outf.exists()) outf = new File(outPrefix+"-01.ppm"); - if (!outf.exists()) outf = new File(outPrefix+"-1.ppm"); - log.debug("PDFTOPPM output is: "+outf+", exists="+outf.exists()); - source = ImageIO.read(outf); - } - catch (InterruptedException e) - { - log.error("Failed transforming file for preview: ",e); - throw new IllegalArgumentException("Failed transforming file for preview: ",e); - } - finally - { - if (!outf.delete()) - { - log.error("Unable to delete file"); - } - } - } - finally - { - if (!sourceTmp.delete()) - { - log.error("Unable to delete temporary source"); - } - - if (status != 0) - { - log.error("PDF conversion proc failed, exit status=" + status + ", file=" + sourceTmp); - } - } - - if (source == null) - { - throw new IOException("Unknown failure while transforming file to preview: no image produced."); - } - - // read in bitstream's image - BufferedImage buf = source; - - // now get the image dimensions - float xsize = (float) buf.getWidth(null); - float ysize = (float) buf.getHeight(null); - - // if verbose flag is set, print out dimensions - // to STDOUT - if (verbose) - { - System.out.println("original size: " + xsize + "," + ysize); - } - - // scale by x first if needed - if (xsize > xmax) - { - // calculate scaling factor so that xsize * scale = new size (max) - float scale_factor = xmax / xsize; - - // if verbose flag is set, print out extracted text - // to STDOUT - if (verbose) - { - System.out.println("x scale factor: " + scale_factor); - } - - // now reduce x size - // and y size - xsize = xsize * scale_factor; - ysize = ysize * scale_factor; - - // if verbose flag is set, print out extracted text - // to STDOUT - if (verbose) - { - System.out.println("new size: " + xsize + "," + ysize); - } - } - - // scale by y if needed - if (ysize > ymax) - { - float scale_factor = ymax / ysize; - - // now reduce x size - // and y size - xsize = xsize * scale_factor; - ysize = ysize * scale_factor; - } - - // if verbose flag is set, print details to STDOUT - if (verbose) - { - System.out.println("created thumbnail size: " + xsize + ", " - + ysize); - } - - // create an image buffer for the thumbnail with the new xsize, ysize - BufferedImage thumbnail = new BufferedImage((int) xsize, (int) ysize, - BufferedImage.TYPE_INT_RGB); - - // Use blurring if selected in config. - // a little blur before scaling does wonders for keeping moire in check. - if (blurring) - { - // send the buffered image off to get blurred. - buf = getBlurredInstance((BufferedImage) buf); - } - - // Use high quality scaling method if selected in config. - // this has a definite performance penalty. - if (hqscaling) - { - // send the buffered image off to get an HQ downscale. - buf = getScaledInstance((BufferedImage) buf, (int) xsize, (int) ysize, - (Object) RenderingHints.VALUE_INTERPOLATION_BICUBIC, (boolean) true); - } - - // now render the image into the thumbnail buffer - Graphics2D g2d = thumbnail.createGraphics(); - g2d.drawImage(buf, 0, 0, (int) xsize, (int) ysize, null); - - // now create an input stream for the thumbnail buffer and return it - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - - ImageIO.write(thumbnail, "jpeg", baos); - - // now get the array - ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); - - return bais; // hope this gets written out before its garbage collected! - } - - public String[] getInputMIMETypes() - { - return ImageIO.getReaderMIMETypes(); - } - - public String[] getInputDescriptions() - { - return null; - } - - public String[] getInputExtensions() - { - // Temporarily disabled as JDK 1.6 only - // return ImageIO.getReaderFileSuffixes(); - return null; - } - - public BufferedImage getNormalizedInstance(BufferedImage buf) - { - int type = (buf.getTransparency() == Transparency.OPAQUE) ? - BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB_PRE; - int w, h; - w = buf.getWidth(); - h = buf.getHeight(); - BufferedImage normal = new BufferedImage(w, h, type); - Graphics2D g2d = normal.createGraphics(); - g2d.drawImage(buf, 0, 0, w, h, Color.WHITE, null); - g2d.dispose(); - return normal; - } - - public BufferedImage getBlurredInstance(BufferedImage buf) - { - /** - * Convenience method that returns a blurred instance of the - * provided {@code BufferedImage}. - * - */ - - buf = getNormalizedInstance(buf); - - // kernel for blur op - float[] matrix = { - 0.111f, 0.111f, 0.111f, - 0.111f, 0.111f, 0.111f, - 0.111f, 0.111f, 0.111f, - }; - - // perform the blur and return the blurred version. - BufferedImageOp blur = new ConvolveOp( new Kernel(3, 3, matrix) ); - BufferedImage blurbuf = blur.filter(buf, null); - return blurbuf; - } - - /** - * Convenience method that returns a scaled instance of the - * provided {@code BufferedImage}. - * - * @param buf the original image to be scaled - * @param targetWidth the desired width of the scaled instance, - * in pixels - * @param targetHeight the desired height of the scaled instance, - * in pixels - * @param hint one of the rendering hints that corresponds to - * {@code RenderingHints.KEY_INTERPOLATION} (e.g. - * {@code RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR}, - * {@code RenderingHints.VALUE_INTERPOLATION_BILINEAR}, - * {@code RenderingHints.VALUE_INTERPOLATION_BICUBIC}) - * @param higherQuality if true, this method will use a multi-step - * scaling technique that provides higher quality than the usual - * one-step technique (only useful in downscaling cases, where - * {@code targetWidth} or {@code targetHeight} is - * smaller than the original dimensions, and generally only when - * the {@code BILINEAR} hint is specified) - * @return a scaled version of the original {@code BufferedImage} - */ - public BufferedImage getScaledInstance(BufferedImage buf, - int targetWidth, - int targetHeight, - Object hint, - boolean higherQuality) - { - int type = (buf.getTransparency() == Transparency.OPAQUE) ? - BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB; - BufferedImage scalebuf = (BufferedImage)buf; - int w, h; - if (higherQuality) { - // Use multi-step technique: start with original size, then - // scale down in multiple passes with drawImage() - // until the target size is reached - w = buf.getWidth(); - h = buf.getHeight(); - } else { - // Use one-step technique: scale directly from original - // size to target size with a single drawImage() call - w = targetWidth; - h = targetHeight; - } - - do { - if (higherQuality && w > targetWidth) { - w /= 2; - if (w < targetWidth) { - w = targetWidth; - } - } - - if (higherQuality && h > targetHeight) { - h /= 2; - if (h < targetHeight) { - h = targetHeight; - } - } - - BufferedImage tmp = new BufferedImage(w, h, type); - Graphics2D g2d = tmp.createGraphics(); - g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, hint); - g2d.drawImage(scalebuf, 0, 0, w, h, Color.WHITE, null); - g2d.dispose(); - - scalebuf = tmp; - } while (w != targetWidth || h != targetHeight); - - return scalebuf; - } -} diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index df8e29987e..50f76b5b37 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -388,7 +388,7 @@ filter.org.dspace.app.mediafilter.ExcelFilter.inputFormats = Microsoft Excel, Mi #Publicly accessible thumbnails of restricted content. #List the MediaFilter name's that would get publicly accessible permissions #Any media filters not listed will instead inherit the permissions of the parent bitstream -#filter.org.dspace.app.mediafilter.publicPermission = JPEGFilter, XPDF2Thumbnail +#filter.org.dspace.app.mediafilter.publicPermission = JPEGFilter #Custom settings for PDFFilter # If true, all PDF extractions are written to temp files as they are indexed...this diff --git a/dspace/modules/additions/pom.xml b/dspace/modules/additions/pom.xml index 5422aeee3e..721455501b 100644 --- a/dspace/modules/additions/pom.xml +++ b/dspace/modules/additions/pom.xml @@ -42,30 +42,6 @@ - - - - xpdf-mediafilter-support - - false - - - - com.sun.media - jai_imageio - - - javax.media - jai_core - - - -