DS-412 generate UTF-8 encoded text, improve error reporting

git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@4635 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
Larry Stone
2009-12-16 23:59:17 +00:00
parent 4078f157a2
commit 2522b32016

View File

@@ -78,10 +78,10 @@ public class XPDF2Text extends MediaFilter
{
private static Logger log = Logger.getLogger(XPDF2Text.class);
// command to get image from PDF; @FILE@, @OUTPUT@ are placeholders
// Command to get text from pdf; @infile@, @COMMAND@ are placeholders
private static final String XPDF_PDFTOTEXT_COMMAND[] =
{
"@COMMAND@", "-q", "@infile@", "-"
"@COMMAND@", "-q", "-enc", "UTF-8", "@infile@", "-"
};
@@ -134,7 +134,7 @@ public class XPDF2Text extends MediaFilter
String pdfCmd[] = XPDF_PDFTOTEXT_COMMAND.clone();
pdfCmd[0] = pdftotextPath;
pdfCmd[2] = sourceTmp.toString();
pdfCmd[4] = sourceTmp.toString();
log.debug("Running command: "+Arrays.deepToString(pdfCmd));
Process pdfProc = Runtime.getRuntime().exec(pdfCmd);
@@ -145,9 +145,16 @@ public class XPDF2Text extends MediaFilter
baos.close();
status = pdfProc.waitFor();
if (status != 0)
String msg = null;
if (status == 1)
msg = "pdftotext failed opening input: file="+sourceTmp.toString();
else if (status == 3)
msg = "pdftotext permission failure (perhaps copying of text from this document is not allowed - check PDF file's internal permissions): file="+sourceTmp.toString();
else if (status != 0)
msg = "pdftotext failed, maybe corrupt PDF? status="+String.valueOf(status);
if (msg != null)
{
String msg = "pdftotext failed, maybe corrupt PDF? status="+String.valueOf(status);
log.error(msg);
throw new IOException(msg);
}