DS-3035 upgrade to PDFBox 2 (code builds, not tested)

This commit is contained in:
Ivan Masár
2016-02-04 00:59:52 +01:00
parent e066cd53d8
commit 0776a2622f
6 changed files with 27 additions and 25 deletions

View File

@@ -418,10 +418,6 @@
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId> <artifactId>fontbox</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.bouncycastle</groupId> <groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15</artifactId> <artifactId>bcprov-jdk15</artifactId>

View File

@@ -18,7 +18,7 @@ import java.io.Writer;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper; import org.apache.pdfbox.text.PDFTextStripper;
import org.dspace.content.Item; import org.dspace.content.Item;
import org.dspace.core.ConfigurationManager; import org.dspace.core.ConfigurationManager;

View File

@@ -20,6 +20,9 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeException;
@@ -299,7 +302,17 @@ public class PDFPackager
try try
{ {
PDFParser parser = new PDFParser(metadata); ScratchFile scratchFile = null;
try
{
scratchFile = new ScratchFile(MemoryUsageSetting.setupMixed(104857600)); // use up to 100 MB memory, fallback to temp file (unlimited size)
}
catch (IOException ioe)
{
log.warn("Error initializing scratch file: " + ioe.getMessage());
}
PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(metadata), scratchFile);
parser.parse(); parser.parse();
cos = parser.getDocument(); cos = parser.getDocument();

View File

@@ -9,11 +9,11 @@ package org.dspace.disseminate;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeException;
@@ -274,13 +274,13 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
@Override @Override
public File makeCitedDocument(Context context, Bitstream bitstream) public File makeCitedDocument(Context context, Bitstream bitstream)
throws IOException, SQLException, AuthorizeException, COSVisitorException { throws IOException, SQLException, AuthorizeException {
PDDocument document = new PDDocument(); PDDocument document = new PDDocument();
PDDocument sourceDocument = new PDDocument(); PDDocument sourceDocument = new PDDocument();
try { try {
Item item = (Item) bitstreamService.getParentObject(context, bitstream); Item item = (Item) bitstreamService.getParentObject(context, bitstream);
sourceDocument = sourceDocument.load(bitstreamService.retrieve(context, bitstream)); sourceDocument = sourceDocument.load(bitstreamService.retrieve(context, bitstream));
PDPage coverPage = new PDPage(PDPage.PAGE_SIZE_LETTER); PDPage coverPage = new PDPage(PDRectangle.LETTER); // TODO: needs to be configurable
generateCoverPage(context, document, coverPage, item); generateCoverPage(context, document, coverPage, item);
addCoverPageToDocument(document, sourceDocument, coverPage); addCoverPageToDocument(document, sourceDocument, coverPage);
@@ -292,7 +292,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
} }
} }
protected void generateCoverPage(Context context, PDDocument document, PDPage coverPage, Item item) throws IOException, COSVisitorException { protected void generateCoverPage(Context context, PDDocument document, PDPage coverPage, Item item) throws IOException {
PDPageContentStream contentStream = new PDPageContentStream(document, coverPage); PDPageContentStream contentStream = new PDPageContentStream(document, coverPage);
try { try {
int ypos = 760; int ypos = 760;
@@ -360,7 +360,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
} }
protected void addCoverPageToDocument(PDDocument document, PDDocument sourceDocument, PDPage coverPage) { protected void addCoverPageToDocument(PDDocument document, PDDocument sourceDocument, PDPage coverPage) {
List<PDPage> sourcePageList = sourceDocument.getDocumentCatalog().getAllPages(); PDPageTree sourcePageList = sourceDocument.getDocumentCatalog().getPages();
if (isCitationFirstPage()) { if (isCitationFirstPage()) {
//citation as cover page //citation as cover page
@@ -375,7 +375,6 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
} }
document.addPage(coverPage); document.addPage(coverPage);
} }
sourcePageList.clear();
} }
@Override @Override
@@ -383,7 +382,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
int startX, int startY, PDFont pdfFont, float fontSize) throws IOException { int startX, int startY, PDFont pdfFont, float fontSize) throws IOException {
float leading = 1.5f * fontSize; float leading = 1.5f * fontSize;
PDRectangle mediabox = page.findMediaBox(); PDRectangle mediabox = page.getMediaBox();
float margin = 72; float margin = 72;
float width = mediabox.getWidth() - 2*margin; float width = mediabox.getWidth() - 2*margin;
@@ -474,7 +473,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
final int rows = content.length; final int rows = content.length;
final int cols = content[0].length; final int cols = content[0].length;
final float rowHeight = 20f; final float rowHeight = 20f;
final float tableWidth = page.findMediaBox().getWidth()-(2*margin); final float tableWidth = page.getMediaBox().getWidth()-(2*margin);
final float tableHeight = rowHeight * rows; final float tableHeight = rowHeight * rows;
final float colWidth = tableWidth/(float)cols; final float colWidth = tableWidth/(float)cols;
final float cellMargin=5f; final float cellMargin=5f;

View File

@@ -7,9 +7,8 @@
*/ */
package org.dspace.disseminate.service; package org.dspace.disseminate.service;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFont;
import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream; import org.dspace.content.Bitstream;
@@ -74,7 +73,7 @@ public interface CitationDocumentService {
* @throws org.dspace.authorize.AuthorizeException * @throws org.dspace.authorize.AuthorizeException
*/ */
public File makeCitedDocument(Context context, Bitstream bitstream) public File makeCitedDocument(Context context, Bitstream bitstream)
throws IOException, SQLException, AuthorizeException, COSVisitorException; throws IOException, SQLException, AuthorizeException;
public int drawStringWordWrap(PDPage page, PDPageContentStream contentStream, String text, public int drawStringWordWrap(PDPage page, PDPageContentStream contentStream, String text,
int startX, int startY, PDFont pdfFont, float fontSize) throws IOException; int startX, int startY, PDFont pdfFont, float fontSize) throws IOException;

View File

@@ -1020,17 +1020,12 @@
<dependency> <dependency>
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId> <artifactId>pdfbox</artifactId>
<version>1.8.11</version> <version>2.0.0-RC3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId> <artifactId>fontbox</artifactId>
<version>1.8.7</version> <version>2.0.0-RC3</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.7</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.bouncycastle</groupId> <groupId>org.bouncycastle</groupId>