[Port dspace-8_x] fix(#10721): Sanitize non-characters during OAI indexing (#11397)

* fix(#10721): Sanitize non-characters during OAI indexing

(cherry picked from commit ad890b0661)

* refactor: Use StringEscapeUtils as suggested in review

(cherry picked from commit 0ddd5ad575)

* fix: Removed whitespace before the import that was causing error

Maven Unit Test failed due to the whitespace before the import
https://github.com/DSpace/DSpace/actions/runs/16891881837/job/47853392956?pr=11139#step:4:1959

(cherry picked from commit 406bffdcca)

* fix: Removed trailing whitespace that was causing error

(cherry picked from commit 266ac423b4)

---------

Co-authored-by: JohnnyMendesC <177888064+JohnnyMendesC@users.noreply.github.com>
This commit is contained in:
DSpace Bot
2025-10-01 16:38:54 -05:00
committed by GitHub
parent f88ea11ead
commit 521e281e4b

View File

@@ -18,6 +18,7 @@ import java.util.List;
import com.lyncode.xoai.dataprovider.xml.xoai.Element; import com.lyncode.xoai.dataprovider.xml.xoai.Element;
import com.lyncode.xoai.dataprovider.xml.xoai.Metadata; import com.lyncode.xoai.dataprovider.xml.xoai.Metadata;
import com.lyncode.xoai.util.Base64Utils; import com.lyncode.xoai.util.Base64Utils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.dspace.app.util.factory.UtilServiceFactory; import org.dspace.app.util.factory.UtilServiceFactory;
@@ -165,6 +166,19 @@ public class ItemUtils {
return bundles; return bundles;
} }
/**
* Sanitizes a string to remove characters that are invalid
* in XML 1.0 using the Apache Commons Text library.
* @param value The string to sanitize.
* @return A sanitized string, or null if the input was null.
*/
private static String sanitize(String value) {
if (value == null) {
return null;
}
return StringEscapeUtils.escapeXml10(value);
}
/** /**
* This method will add metadata information about associated resource policies for a give bitstream. * This method will add metadata information about associated resource policies for a give bitstream.
* It will parse of relevant policies and add metadata information * It will parse of relevant policies and add metadata information
@@ -281,7 +295,7 @@ public class ItemUtils {
valueElem = language; valueElem = language;
} }
valueElem.getField().add(createValue("value", val.getValue())); valueElem.getField().add(createValue("value", sanitize(val.getValue())));
if (val.getAuthority() != null) { if (val.getAuthority() != null) {
valueElem.getField().add(createValue("authority", val.getAuthority())); valueElem.getField().add(createValue("authority", val.getAuthority()));
if (val.getConfidence() != Choices.CF_NOVALUE) { if (val.getConfidence() != Choices.CF_NOVALUE) {