DS-3035 upgrade to PDFBox 2 (code builds, not tested)

This commit is contained in:
Ivan Masár
2016-02-04 00:59:52 +01:00
parent e066cd53d8
commit 0776a2622f
6 changed files with 27 additions and 25 deletions

View File

@@ -418,10 +418,6 @@
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15</artifactId>

View File

@@ -18,7 +18,7 @@ import java.io.Writer;
import org.apache.log4j.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripper;
import org.dspace.content.Item;
import org.dspace.core.ConfigurationManager;

View File

@@ -20,6 +20,9 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.dspace.authorize.AuthorizeException;
@@ -299,7 +302,17 @@ public class PDFPackager
try
{
PDFParser parser = new PDFParser(metadata);
ScratchFile scratchFile = null;
try
{
scratchFile = new ScratchFile(MemoryUsageSetting.setupMixed(104857600)); // use up to 100 MB memory, fallback to temp file (unlimited size)
}
catch (IOException ioe)
{
log.warn("Error initializing scratch file: " + ioe.getMessage());
}
PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(metadata), scratchFile);
parser.parse();
cos = parser.getDocument();

View File

@@ -9,11 +9,11 @@ package org.dspace.disseminate;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.dspace.authorize.AuthorizeException;
@@ -274,13 +274,13 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
@Override
public File makeCitedDocument(Context context, Bitstream bitstream)
throws IOException, SQLException, AuthorizeException, COSVisitorException {
throws IOException, SQLException, AuthorizeException {
PDDocument document = new PDDocument();
PDDocument sourceDocument = new PDDocument();
try {
Item item = (Item) bitstreamService.getParentObject(context, bitstream);
sourceDocument = sourceDocument.load(bitstreamService.retrieve(context, bitstream));
PDPage coverPage = new PDPage(PDPage.PAGE_SIZE_LETTER);
PDPage coverPage = new PDPage(PDRectangle.LETTER); // TODO: needs to be configurable
generateCoverPage(context, document, coverPage, item);
addCoverPageToDocument(document, sourceDocument, coverPage);
@@ -292,7 +292,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
}
}
protected void generateCoverPage(Context context, PDDocument document, PDPage coverPage, Item item) throws IOException, COSVisitorException {
protected void generateCoverPage(Context context, PDDocument document, PDPage coverPage, Item item) throws IOException {
PDPageContentStream contentStream = new PDPageContentStream(document, coverPage);
try {
int ypos = 760;
@@ -360,7 +360,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
}
protected void addCoverPageToDocument(PDDocument document, PDDocument sourceDocument, PDPage coverPage) {
List<PDPage> sourcePageList = sourceDocument.getDocumentCatalog().getAllPages();
PDPageTree sourcePageList = sourceDocument.getDocumentCatalog().getPages();
if (isCitationFirstPage()) {
//citation as cover page
@@ -375,7 +375,6 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
}
document.addPage(coverPage);
}
sourcePageList.clear();
}
@Override
@@ -383,7 +382,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
int startX, int startY, PDFont pdfFont, float fontSize) throws IOException {
float leading = 1.5f * fontSize;
PDRectangle mediabox = page.findMediaBox();
PDRectangle mediabox = page.getMediaBox();
float margin = 72;
float width = mediabox.getWidth() - 2*margin;
@@ -474,7 +473,7 @@ public class CitationDocumentServiceImpl implements CitationDocumentService, Ini
final int rows = content.length;
final int cols = content[0].length;
final float rowHeight = 20f;
final float tableWidth = page.findMediaBox().getWidth()-(2*margin);
final float tableWidth = page.getMediaBox().getWidth()-(2*margin);
final float tableHeight = rowHeight * rows;
final float colWidth = tableWidth/(float)cols;
final float cellMargin=5f;

View File

@@ -7,9 +7,8 @@
*/
package org.dspace.disseminate.service;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
@@ -74,7 +73,7 @@ public interface CitationDocumentService {
* @throws org.dspace.authorize.AuthorizeException
*/
public File makeCitedDocument(Context context, Bitstream bitstream)
throws IOException, SQLException, AuthorizeException, COSVisitorException;
throws IOException, SQLException, AuthorizeException;
public int drawStringWordWrap(PDPage page, PDPageContentStream contentStream, String text,
int startX, int startY, PDFont pdfFont, float fontSize) throws IOException;

View File

@@ -1020,17 +1020,12 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>1.8.11</version>
<version>2.0.0-RC3</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>1.8.7</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.7</version>
<version>2.0.0-RC3</version>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>