S.F. Patch 1679972 OAIDCCrosswalk NPE and invalid character fix

git-svn-id: http://scm.dspace.org/svn/repo/trunk@2094 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
Graham Triggs
2007-07-25 16:09:22 +00:00
parent aadd02c274
commit 98fc9b8786

View File

@@ -40,6 +40,9 @@
package org.dspace.app.oai;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dspace.content.DCValue;
import org.dspace.content.Item;
@@ -57,7 +60,12 @@ import ORG.oclc.oai.server.verb.CannotDisseminateFormatException;
*/
public class OAIDCCrosswalk extends Crosswalk
{
public OAIDCCrosswalk(Properties properties)
// Pattern containing all the characters we want to filter out / replace
// converting a String to xml
private static final Pattern invalidXmlPattern =
Pattern.compile("([^\\t\\n\\r\\u0020-\\ud7ff\\ue000-\\ufffd\\u10000-\\u10ffff]+|[&<>])");
public OAIDCCrosswalk(Properties properties)
{
super(
"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
@@ -107,51 +115,38 @@ public class OAIDCCrosswalk extends Crosswalk
element = "creator";
}
// Escape XML chars <, > and &
String value = allDC[i].value;
// Escape XML chars <, > and &
// Also replace all invalid characters with ' '
if (value != null)
{
// remove control unicode char
String temp = value.trim();
char[] dcvalue = temp.toCharArray();
for (int charPos = 0; charPos < dcvalue.length; charPos++)
{
if (Character.isISOControl(dcvalue[charPos]) &&
!String.valueOf(dcvalue[charPos]).equals("\u0009") &&
!String.valueOf(dcvalue[charPos]).equals("\n") &&
!String.valueOf(dcvalue[charPos]).equals("\r"))
{
dcvalue[charPos] = ' ';
}
}
value = String.valueOf(dcvalue);
StringBuffer valueBuf = new StringBuffer(value.length());
Matcher xmlMatcher = invalidXmlPattern.matcher(value.trim());
while (xmlMatcher.find())
{
String group = xmlMatcher.group();
// group will either contain a character that we need to encode for xml
// (ie. <, > or &), or it will be an invalid character
// test the contents and replace appropriately
if (group.equals("&"))
xmlMatcher.appendReplacement(valueBuf, "&amp;");
else if (group.equals("<"))
xmlMatcher.appendReplacement(valueBuf, "&lt;");
else if (group.equals(">"))
xmlMatcher.appendReplacement(valueBuf, "&gt;");
else
xmlMatcher.appendReplacement(valueBuf, " ");
}
// add bit of the string after the final match
xmlMatcher.appendTail(valueBuf);
metadata.append("<dc:").append(element).append(">").append(
valueBuf.toString()).append("</dc:").append(element).append(">");
}
// First do &'s - need to be careful not to replace the
// & in "&amp;" again!
int c = -1;
while ((c = value.indexOf("&", c + 1)) > -1)
{
value = value.substring(0, c) + "&amp;"
+ value.substring(c + 1);
}
while ((c = value.indexOf("<")) > -1)
{
value = value.substring(0, c) + "&lt;"
+ value.substring(c + 1);
}
while ((c = value.indexOf(">")) > -1)
{
value = value.substring(0, c) + "&gt;"
+ value.substring(c + 1);
}
metadata.append("<dc:").append(element).append(">").append(
value).append("</dc:").append(element).append(">");
}
}