mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 01:54:22 +00:00
S.F. Patch 1679972 OAIDCCrosswalk NPE and invalid character fix
git-svn-id: http://scm.dspace.org/svn/repo/trunk@2094 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -40,6 +40,9 @@
|
||||
package org.dspace.app.oai;
|
||||
|
||||
import java.util.Properties;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.dspace.content.DCValue;
|
||||
import org.dspace.content.Item;
|
||||
@@ -57,7 +60,12 @@ import ORG.oclc.oai.server.verb.CannotDisseminateFormatException;
|
||||
*/
|
||||
public class OAIDCCrosswalk extends Crosswalk
|
||||
{
|
||||
public OAIDCCrosswalk(Properties properties)
|
||||
// Pattern containing all the characters we want to filter out / replace
|
||||
// converting a String to xml
|
||||
private static final Pattern invalidXmlPattern =
|
||||
Pattern.compile("([^\\t\\n\\r\\u0020-\\ud7ff\\ue000-\\ufffd\\u10000-\\u10ffff]+|[&<>])");
|
||||
|
||||
public OAIDCCrosswalk(Properties properties)
|
||||
{
|
||||
super(
|
||||
"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
|
||||
@@ -107,51 +115,38 @@ public class OAIDCCrosswalk extends Crosswalk
|
||||
element = "creator";
|
||||
}
|
||||
|
||||
// Escape XML chars <, > and &
|
||||
String value = allDC[i].value;
|
||||
|
||||
// Escape XML chars <, > and &
|
||||
// Also replace all invalid characters with ' '
|
||||
if (value != null)
|
||||
{
|
||||
// remove control unicode char
|
||||
String temp = value.trim();
|
||||
char[] dcvalue = temp.toCharArray();
|
||||
for (int charPos = 0; charPos < dcvalue.length; charPos++)
|
||||
{
|
||||
if (Character.isISOControl(dcvalue[charPos]) &&
|
||||
!String.valueOf(dcvalue[charPos]).equals("\u0009") &&
|
||||
!String.valueOf(dcvalue[charPos]).equals("\n") &&
|
||||
!String.valueOf(dcvalue[charPos]).equals("\r"))
|
||||
{
|
||||
dcvalue[charPos] = ' ';
|
||||
}
|
||||
}
|
||||
value = String.valueOf(dcvalue);
|
||||
StringBuffer valueBuf = new StringBuffer(value.length());
|
||||
Matcher xmlMatcher = invalidXmlPattern.matcher(value.trim());
|
||||
while (xmlMatcher.find())
|
||||
{
|
||||
String group = xmlMatcher.group();
|
||||
|
||||
// group will either contain a character that we need to encode for xml
|
||||
// (ie. <, > or &), or it will be an invalid character
|
||||
// test the contents and replace appropriately
|
||||
|
||||
if (group.equals("&"))
|
||||
xmlMatcher.appendReplacement(valueBuf, "&");
|
||||
else if (group.equals("<"))
|
||||
xmlMatcher.appendReplacement(valueBuf, "<");
|
||||
else if (group.equals(">"))
|
||||
xmlMatcher.appendReplacement(valueBuf, ">");
|
||||
else
|
||||
xmlMatcher.appendReplacement(valueBuf, " ");
|
||||
}
|
||||
|
||||
// add bit of the string after the final match
|
||||
xmlMatcher.appendTail(valueBuf);
|
||||
|
||||
metadata.append("<dc:").append(element).append(">").append(
|
||||
valueBuf.toString()).append("</dc:").append(element).append(">");
|
||||
}
|
||||
|
||||
// First do &'s - need to be careful not to replace the
|
||||
// & in "&" again!
|
||||
int c = -1;
|
||||
|
||||
while ((c = value.indexOf("&", c + 1)) > -1)
|
||||
{
|
||||
value = value.substring(0, c) + "&"
|
||||
+ value.substring(c + 1);
|
||||
}
|
||||
|
||||
while ((c = value.indexOf("<")) > -1)
|
||||
{
|
||||
value = value.substring(0, c) + "<"
|
||||
+ value.substring(c + 1);
|
||||
}
|
||||
|
||||
while ((c = value.indexOf(">")) > -1)
|
||||
{
|
||||
value = value.substring(0, c) + ">"
|
||||
+ value.substring(c + 1);
|
||||
}
|
||||
|
||||
metadata.append("<dc:").append(element).append(">").append(
|
||||
value).append("</dc:").append(element).append(">");
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user