Merge pull request #11399 from tdonohue/port_11139_to_7x

[Port dspace-7_x] fix(#10721): Sanitize non-characters during OAI indexing
This commit is contained in:
Tim Donohue
2025-10-01 16:39:31 -05:00
committed by GitHub

View File

@@ -16,6 +16,7 @@ import java.util.List;
import com.lyncode.xoai.dataprovider.xml.xoai.Element;
import com.lyncode.xoai.dataprovider.xml.xoai.Metadata;
import com.lyncode.xoai.util.Base64Utils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.factory.UtilServiceFactory;
@@ -159,6 +160,19 @@ public class ItemUtils {
return bundles;
}
/**
* Sanitizes a string to remove characters that are invalid
* in XML 1.0 using the Apache Commons Text library.
* @param value The string to sanitize.
* @return A sanitized string, or null if the input was null.
*/
private static String sanitize(String value) {
if (value == null) {
return null;
}
return StringEscapeUtils.escapeXml10(value);
}
private static Element createLicenseElement(Context context, Item item)
throws SQLException, AuthorizeException, IOException {
Element license = create("license");
@@ -232,7 +246,7 @@ public class ItemUtils {
valueElem = language;
}
valueElem.getField().add(createValue("value", val.getValue()));
valueElem.getField().add(createValue("value", sanitize(val.getValue())));
if (val.getAuthority() != null) {
valueElem.getField().add(createValue("authority", val.getAuthority()));
if (val.getConfidence() != Choices.CF_NOVALUE) {