mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 10:04:21 +00:00
dspace-api: tell ImageMagick about the PDF CropBox
ImageMagick uses the MediaBox by default when rasterizing PDFs be- cause the PDF specification says that all PDFs *must* contain one. This page box is the parent for all other boxes that a PDF *may* contain, for example a CropBox, ArtBox, etc. In many cases these are the same, but when they are not the CropBox is used to define the area displayed to a user when they open the PDF on screen (as opposed to when printing on paper). If a PDF has a CropBox that is different to its MediaBox then we should tell ImageMagick to use it. Fixes: https://github.com/DSpace/DSpace/issues/8549
This commit is contained in:
@@ -14,6 +14,9 @@ import java.io.InputStream;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.dspace.content.Bitstream;
|
||||
import org.dspace.content.Bundle;
|
||||
import org.dspace.content.Item;
|
||||
@@ -132,6 +135,26 @@ public abstract class ImageMagickThumbnailFilter extends MediaFilter {
|
||||
op.density(Integer.valueOf(density));
|
||||
}
|
||||
|
||||
// Check the PDF's MediaBox and CropBox to see if they are the same.
|
||||
// If not, then tell ImageMagick to use the CropBox when generating
|
||||
// the thumbnail because the CropBox is generally used to define the
|
||||
// area displayed when a user opens the PDF on a screen, whereas the
|
||||
// MediaBox is used for print. Not all PDFs set these correctly, so
|
||||
// we can use ImageMagick's default behavior unless we see an explit
|
||||
// CropBox. Note: we don't need to do anything special to detect if
|
||||
// the CropBox is missing or empty because pdfbox will set it to the
|
||||
// same size as the MediaBox if it doesn't exist. Also note that we
|
||||
// only need to check the first page, since that's what we use for
|
||||
// generating the thumbnail (PDDocument uses a zero-based index).
|
||||
PDPage pdfPage = PDDocument.load(f).getPage(0);
|
||||
PDRectangle pdfPageMediaBox = pdfPage.getMediaBox();
|
||||
PDRectangle pdfPageCropBox = pdfPage.getCropBox();
|
||||
|
||||
// This option must come *before* we open the input file.
|
||||
if (pdfPageCropBox != pdfPageMediaBox) {
|
||||
op.define("pdf:use-cropbox=true");
|
||||
}
|
||||
|
||||
String s = "[" + page + "]";
|
||||
op.addImage(f.getAbsolutePath() + s);
|
||||
if (configurationService.getBooleanProperty(PRE + ".flatten", true)) {
|
||||
|
Reference in New Issue
Block a user