mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-13 04:53:16 +00:00
S.F. Patch 1679972 OAIDCCrosswalk NPE and invalid character fix
git-svn-id: http://scm.dspace.org/svn/repo/trunk@2094 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -40,6 +40,9 @@
|
|||||||
package org.dspace.app.oai;
|
package org.dspace.app.oai;
|
||||||
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
import java.util.StringTokenizer;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.dspace.content.DCValue;
|
import org.dspace.content.DCValue;
|
||||||
import org.dspace.content.Item;
|
import org.dspace.content.Item;
|
||||||
@@ -57,7 +60,12 @@ import ORG.oclc.oai.server.verb.CannotDisseminateFormatException;
|
|||||||
*/
|
*/
|
||||||
public class OAIDCCrosswalk extends Crosswalk
|
public class OAIDCCrosswalk extends Crosswalk
|
||||||
{
|
{
|
||||||
public OAIDCCrosswalk(Properties properties)
|
// Pattern containing all the characters we want to filter out / replace
|
||||||
|
// converting a String to xml
|
||||||
|
private static final Pattern invalidXmlPattern =
|
||||||
|
Pattern.compile("([^\\t\\n\\r\\u0020-\\ud7ff\\ue000-\\ufffd\\u10000-\\u10ffff]+|[&<>])");
|
||||||
|
|
||||||
|
public OAIDCCrosswalk(Properties properties)
|
||||||
{
|
{
|
||||||
super(
|
super(
|
||||||
"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
|
"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
|
||||||
@@ -107,51 +115,38 @@ public class OAIDCCrosswalk extends Crosswalk
|
|||||||
element = "creator";
|
element = "creator";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Escape XML chars <, > and &
|
|
||||||
String value = allDC[i].value;
|
String value = allDC[i].value;
|
||||||
|
|
||||||
|
// Escape XML chars <, > and &
|
||||||
|
// Also replace all invalid characters with ' '
|
||||||
if (value != null)
|
if (value != null)
|
||||||
{
|
{
|
||||||
// remove control unicode char
|
StringBuffer valueBuf = new StringBuffer(value.length());
|
||||||
String temp = value.trim();
|
Matcher xmlMatcher = invalidXmlPattern.matcher(value.trim());
|
||||||
char[] dcvalue = temp.toCharArray();
|
while (xmlMatcher.find())
|
||||||
for (int charPos = 0; charPos < dcvalue.length; charPos++)
|
{
|
||||||
{
|
String group = xmlMatcher.group();
|
||||||
if (Character.isISOControl(dcvalue[charPos]) &&
|
|
||||||
!String.valueOf(dcvalue[charPos]).equals("\u0009") &&
|
// group will either contain a character that we need to encode for xml
|
||||||
!String.valueOf(dcvalue[charPos]).equals("\n") &&
|
// (ie. <, > or &), or it will be an invalid character
|
||||||
!String.valueOf(dcvalue[charPos]).equals("\r"))
|
// test the contents and replace appropriately
|
||||||
{
|
|
||||||
dcvalue[charPos] = ' ';
|
if (group.equals("&"))
|
||||||
}
|
xmlMatcher.appendReplacement(valueBuf, "&");
|
||||||
}
|
else if (group.equals("<"))
|
||||||
value = String.valueOf(dcvalue);
|
xmlMatcher.appendReplacement(valueBuf, "<");
|
||||||
|
else if (group.equals(">"))
|
||||||
|
xmlMatcher.appendReplacement(valueBuf, ">");
|
||||||
|
else
|
||||||
|
xmlMatcher.appendReplacement(valueBuf, " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
// add bit of the string after the final match
|
||||||
|
xmlMatcher.appendTail(valueBuf);
|
||||||
|
|
||||||
|
metadata.append("<dc:").append(element).append(">").append(
|
||||||
|
valueBuf.toString()).append("</dc:").append(element).append(">");
|
||||||
}
|
}
|
||||||
|
|
||||||
// First do &'s - need to be careful not to replace the
|
|
||||||
// & in "&" again!
|
|
||||||
int c = -1;
|
|
||||||
|
|
||||||
while ((c = value.indexOf("&", c + 1)) > -1)
|
|
||||||
{
|
|
||||||
value = value.substring(0, c) + "&"
|
|
||||||
+ value.substring(c + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((c = value.indexOf("<")) > -1)
|
|
||||||
{
|
|
||||||
value = value.substring(0, c) + "<"
|
|
||||||
+ value.substring(c + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((c = value.indexOf(">")) > -1)
|
|
||||||
{
|
|
||||||
value = value.substring(0, c) + ">"
|
|
||||||
+ value.substring(c + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
metadata.append("<dc:").append(element).append(">").append(
|
|
||||||
value).append("</dc:").append(element).append(">");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user