S.F. Patch 1679972 OAIDCCrosswalk NPE and invalid character fix

git-svn-id: http://scm.dspace.org/svn/repo/trunk@2094 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
Graham Triggs
2007-07-25 16:09:22 +00:00
parent aadd02c274
commit 98fc9b8786

View File

@@ -40,6 +40,9 @@
package org.dspace.app.oai; package org.dspace.app.oai;
import java.util.Properties; import java.util.Properties;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dspace.content.DCValue; import org.dspace.content.DCValue;
import org.dspace.content.Item; import org.dspace.content.Item;
@@ -57,6 +60,11 @@ import ORG.oclc.oai.server.verb.CannotDisseminateFormatException;
*/ */
public class OAIDCCrosswalk extends Crosswalk public class OAIDCCrosswalk extends Crosswalk
{ {
// Pattern containing all the characters we want to filter out / replace
// converting a String to xml
private static final Pattern invalidXmlPattern =
Pattern.compile("([^\\t\\n\\r\\u0020-\\ud7ff\\ue000-\\ufffd\\u10000-\\u10ffff]+|[&<>])");
public OAIDCCrosswalk(Properties properties) public OAIDCCrosswalk(Properties properties)
{ {
super( super(
@@ -107,51 +115,38 @@ public class OAIDCCrosswalk extends Crosswalk
element = "creator"; element = "creator";
} }
// Escape XML chars <, > and &
String value = allDC[i].value; String value = allDC[i].value;
// Escape XML chars <, > and &
// Also replace all invalid characters with ' '
if (value != null) if (value != null)
{ {
// remove control unicode char StringBuffer valueBuf = new StringBuffer(value.length());
String temp = value.trim(); Matcher xmlMatcher = invalidXmlPattern.matcher(value.trim());
char[] dcvalue = temp.toCharArray(); while (xmlMatcher.find())
for (int charPos = 0; charPos < dcvalue.length; charPos++)
{ {
if (Character.isISOControl(dcvalue[charPos]) && String group = xmlMatcher.group();
!String.valueOf(dcvalue[charPos]).equals("\u0009") &&
!String.valueOf(dcvalue[charPos]).equals("\n") && // group will either contain a character that we need to encode for xml
!String.valueOf(dcvalue[charPos]).equals("\r")) // (ie. <, > or &), or it will be an invalid character
{ // test the contents and replace appropriately
dcvalue[charPos] = ' ';
} if (group.equals("&"))
} xmlMatcher.appendReplacement(valueBuf, "&amp;");
value = String.valueOf(dcvalue); else if (group.equals("<"))
xmlMatcher.appendReplacement(valueBuf, "&lt;");
else if (group.equals(">"))
xmlMatcher.appendReplacement(valueBuf, "&gt;");
else
xmlMatcher.appendReplacement(valueBuf, " ");
} }
// First do &'s - need to be careful not to replace the // add bit of the string after the final match
// & in "&amp;" again! xmlMatcher.appendTail(valueBuf);
int c = -1;
while ((c = value.indexOf("&", c + 1)) > -1)
{
value = value.substring(0, c) + "&amp;"
+ value.substring(c + 1);
}
while ((c = value.indexOf("<")) > -1)
{
value = value.substring(0, c) + "&lt;"
+ value.substring(c + 1);
}
while ((c = value.indexOf(">")) > -1)
{
value = value.substring(0, c) + "&gt;"
+ value.substring(c + 1);
}
metadata.append("<dc:").append(element).append(">").append( metadata.append("<dc:").append(element).append(">").append(
value).append("</dc:").append(element).append(">"); valueBuf.toString()).append("</dc:").append(element).append(">");
}
} }
} }