Safe and consistent XML entity handling in parsers

This commit is contained in:
Kim Shepherd
2025-07-02 10:39:50 +02:00
parent 89462082ef
commit 4685450194
33 changed files with 229 additions and 123 deletions

View File

@@ -10,7 +10,6 @@ package org.dspace.administer;
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPath;
@@ -18,6 +17,7 @@ import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.dspace.app.util.XMLUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -49,8 +49,9 @@ public class RegistryImporter {
*/
public static Document loadXML(String filename)
throws IOException, ParserConfigurationException, SAXException {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
// This XML builder will *not* disable external entities as XML
// registries are considered trusted content
DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder();
Document document = builder.parse(new File(filename));

View File

@@ -13,7 +13,6 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPath;
@@ -29,6 +28,7 @@ import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.factory.ContentServiceFactory;
@@ -266,8 +266,9 @@ public class RegistryLoader {
*/
private static Document loadXML(String filename) throws IOException,
ParserConfigurationException, SAXException {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
// This XML builder will *not* disable external entities as XML
// registries are considered trusted content
DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder();
return builder.parse(new File(filename));
}
@@ -351,4 +352,4 @@ public class RegistryLoader {
return data;
}
}
}

View File

@@ -27,7 +27,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPath;
@@ -43,6 +42,7 @@ import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Community;
@@ -613,8 +613,8 @@ public class StructBuilder {
*/
private static org.w3c.dom.Document loadXML(InputStream input)
throws IOException, ParserConfigurationException, SAXException {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
// This builder factory does not disable external DTD, entities, etc.
DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder();
org.w3c.dom.Document document = builder.parse(input);

View File

@@ -47,7 +47,6 @@ import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPath;
@@ -67,6 +66,7 @@ import org.apache.logging.log4j.Logger;
import org.dspace.app.itemimport.service.ItemImportService;
import org.dspace.app.util.LocalSchemaFilenameFilter;
import org.dspace.app.util.RelationshipUtils;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.authorize.ResourcePolicy;
import org.dspace.authorize.service.AuthorizeService;
@@ -179,6 +179,8 @@ public class ItemImportServiceImpl implements ItemImportService, InitializingBea
@Autowired(required = true)
protected MetadataValueService metadataValueService;
protected DocumentBuilder builder;
protected String tempWorkDir;
protected boolean isTest = false;
@@ -1888,9 +1890,7 @@ public class ItemImportServiceImpl implements ItemImportService, InitializingBea
*/
protected Document loadXML(String filename) throws IOException,
ParserConfigurationException, SAXException {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
DocumentBuilder builder = XMLUtils.getDocumentBuilder();
return builder.parse(new File(filename));
}

View File

@@ -23,8 +23,6 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
@@ -33,6 +31,7 @@ import javax.xml.transform.TransformerFactory;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.LocalSchemaFilenameFilter;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
@@ -52,7 +51,6 @@ public class ItemArchive {
public static final String DUBLIN_CORE_XML = "dublin_core.xml";
protected static DocumentBuilder builder = null;
protected Transformer transformer = null;
protected List<DtoMetadata> dtomList = null;
@@ -95,14 +93,14 @@ public class ItemArchive {
InputStream is = null;
try {
is = new FileInputStream(new File(dir, DUBLIN_CORE_XML));
itarch.dtomList = MetadataUtilities.loadDublinCore(getDocumentBuilder(), is);
itarch.dtomList = MetadataUtilities.loadDublinCore(XMLUtils.getDocumentBuilder(), is);
//The code to search for local schema files was copied from org.dspace.app.itemimport
// .ItemImportServiceImpl.java
File file[] = dir.listFiles(new LocalSchemaFilenameFilter());
for (int i = 0; i < file.length; i++) {
is = new FileInputStream(file[i]);
itarch.dtomList.addAll(MetadataUtilities.loadDublinCore(getDocumentBuilder(), is));
itarch.dtomList.addAll(MetadataUtilities.loadDublinCore(XMLUtils.getDocumentBuilder(), is));
}
} finally {
if (is != null) {
@@ -126,14 +124,6 @@ public class ItemArchive {
return itarch;
}
protected static DocumentBuilder getDocumentBuilder()
throws ParserConfigurationException {
if (builder == null) {
builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
}
return builder;
}
/**
* Getter for Transformer
*
@@ -318,7 +308,7 @@ public class ItemArchive {
try {
out = new FileOutputStream(new File(dir, "dublin_core.xml"));
Document doc = MetadataUtilities.writeDublinCore(getDocumentBuilder(), undoDtomList);
Document doc = MetadataUtilities.writeDublinCore(XMLUtils.getDocumentBuilder(), undoDtomList);
MetadataUtilities.writeDocument(doc, getTransformer(), out);
// if undo has delete bitstream

View File

@@ -19,6 +19,7 @@ import java.util.TreeMap;
import org.apache.commons.cli.ParseException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.core.Context;
import org.dspace.scripts.DSpaceRunnable;
import org.dspace.scripts.DSpaceRunnable.StepResult;
@@ -314,7 +315,7 @@ public class ScriptLauncher {
String config = kernelImpl.getConfigurationService().getProperty("dspace.dir") +
System.getProperty("file.separator") + "config" +
System.getProperty("file.separator") + "launcher.xml";
SAXBuilder saxBuilder = new SAXBuilder();
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document doc = null;
try {
doc = saxBuilder.build(config);

View File

@@ -18,6 +18,7 @@ import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.sfx.service.SFXFileReaderService;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.DCPersonName;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
@@ -79,9 +80,9 @@ public class SFXFileReaderServiceImpl implements SFXFileReaderService {
log.info("Parsing XML file... " + fileName);
DocumentBuilder docBuilder;
Document doc = null;
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
docBuilderFactory.setIgnoringElementContentWhitespace(true);
try {
DocumentBuilderFactory docBuilderFactory = XMLUtils.getDocumentBuilderFactory();
docBuilderFactory.setIgnoringElementContentWhitespace(true);
docBuilder = docBuilderFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
log.error("Wrong parser configuration: " + e.getMessage());

View File

@@ -121,7 +121,11 @@ public class DCInputsReader {
String uri = "file:" + new File(fileName).getAbsolutePath();
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// This document builder factory will *not* disable external
// entities as they can be useful in managing large forms, but
// it is up to site administrators to validate the XML they are
// storing
DocumentBuilderFactory factory = XMLUtils.getTrustedDocumentBuilderFactory();
factory.setValidating(false);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);

View File

@@ -11,7 +11,6 @@ import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.cli.CommandLine;
@@ -139,8 +138,9 @@ public class InitializeEntities {
private void parseXMLToRelations(Context context, String fileLocation) throws AuthorizeException {
try {
File fXmlFile = new File(fileLocation);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
// This XML builder will allow external entities, so the relationship types XML should
// be considered trusted by administrators
DocumentBuilder dBuilder = XMLUtils.getTrustedDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();

View File

@@ -170,8 +170,11 @@ public class SubmissionConfigReader {
String uri = "file:" + new File(fileName).getAbsolutePath();
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
// This document builder factory will *not* disable external
// entities as they can be useful in managing large forms, but
// it is up to site administrators to validate the XML they are
// storing
DocumentBuilderFactory factory = XMLUtils.getTrustedDocumentBuilderFactory();
factory.setValidating(false);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
@@ -732,4 +735,4 @@ public class SubmissionConfigReader {
}
return results;
}
}
}

View File

@@ -9,8 +9,13 @@ package org.dspace.app.util;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.XMLInputFactory;
import org.apache.commons.lang3.StringUtils;
import org.jdom2.input.SAXBuilder;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@@ -161,4 +166,122 @@ public class XMLUtils {
}
return result;
}
/**
* Initialize and return a javax DocumentBuilderFactory with NO security
* applied. This is intended only for internal, administrative/configuration
* use where external entities and other dangerous features are actually
* purposefully included.
* The method here is tiny, but may be expanded with other features like
* whitespace handling, and calling this method name helps to document
* the fact that the caller knows it is trusting the XML source / factory.
*
* @return document builder factory to generate new builders
* @throws ParserConfigurationException
*/
public static DocumentBuilderFactory getTrustedDocumentBuilderFactory()
throws ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
return factory;
}
/**
* Initialize and return the javax DocumentBuilderFactory with some basic security
* applied to avoid XXE attacks and other unwanted content inclusion
* @return document builder factory to generate new builders
* @throws ParserConfigurationException
*/
public static DocumentBuilderFactory getDocumentBuilderFactory()
throws ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// No DOCTYPE / DTDs
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
// No external general entities
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
// No external parameter entities
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
// No external DTDs
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
// Even if entities somehow get defined, they will not be expanded
factory.setExpandEntityReferences(false);
// Disable "XInclude" markup processing
factory.setXIncludeAware(false);
return factory;
}
/**
* Initialize and return a javax DocumentBuilder with NO security
* applied. This is intended only for internal, administrative/configuration
* use where external entities and other dangerous features are actually
* purposefully included.
* The method here is tiny, but may be expanded with other features like
* whitespace handling, and calling this method name helps to document
* the fact that the caller knows it is trusting the XML source / builder
*
* @return document builder with no security features set
* @throws ParserConfigurationException
*/
public static DocumentBuilder getTrustedDocumentBuilder()
throws ParserConfigurationException {
return getTrustedDocumentBuilderFactory().newDocumentBuilder();
}
/**
* Initialize and return the javax DocumentBuilder with some basic security applied
* to avoid XXE attacks and other unwanted content inclusion
* @return document builder for use in XML parsing
* @throws ParserConfigurationException
*/
public static DocumentBuilder getDocumentBuilder()
throws ParserConfigurationException {
return getDocumentBuilderFactory().newDocumentBuilder();
}
/**
* Initialize and return the SAX document builder with some basic security applied
* to avoid XXE attacks and other unwanted content inclusion
* @return SAX document builder for use in XML parsing
*/
public static SAXBuilder getSAXBuilder() {
return getSAXBuilder(false);
}
/**
* Initialize and return the SAX document builder with some basic security applied
* to avoid XXE attacks and other unwanted content inclusion
* @param validate whether to use JDOM XSD validation
* @return SAX document builder for use in XML parsing
*/
public static SAXBuilder getSAXBuilder(boolean validate) {
SAXBuilder saxBuilder = new SAXBuilder();
if (validate) {
saxBuilder.setValidation(true);
}
// No DOCTYPE / DTDs
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
// No external general entities
saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
// No external parameter entities
saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
// No external DTDs
saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
// Don't expand entities
saxBuilder.setExpandEntities(false);
return saxBuilder;
}
/**
* Initialize and return the Java XML Input Factory with some basic security applied
* to avoid XXE attacks and other unwanted content inclusion
* @return XML input factory for use in XML parsing
*/
public static XMLInputFactory getXMLInputFactory() {
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
return xmlInputFactory;
}
}

View File

@@ -14,6 +14,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.ArrayUtils;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.packager.PackageDisseminator;
@@ -129,7 +130,7 @@ public class METSDisseminationCrosswalk
try {
//Return just the root Element of the METS file
SAXBuilder builder = new SAXBuilder();
SAXBuilder builder = XMLUtils.getSAXBuilder();
Document metsDocument = builder.build(tempFile);
return metsDocument.getRootElement();
} catch (JDOMException je) {

View File

@@ -22,6 +22,7 @@ import java.util.Properties;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Community;
@@ -144,7 +145,7 @@ public class MODSDisseminationCrosswalk extends SelfNamedPlugin
MODS_NS.getURI() + " " + MODS_XSD;
private static final XMLOutputter outputUgly = new XMLOutputter();
private static final SAXBuilder builder = new SAXBuilder();
private static final SAXBuilder builder = XMLUtils.getSAXBuilder();
private Map<String, modsTriple> modsMap = null;

View File

@@ -22,6 +22,7 @@ import java.util.Properties;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
@@ -125,7 +126,7 @@ public class QDCCrosswalk extends SelfNamedPlugin
// XML schemaLocation fragment for this crosswalk, from config.
private String schemaLocation = null;
private static final SAXBuilder builder = new SAXBuilder();
private static final SAXBuilder builder = XMLUtils.getSAXBuilder();
protected ItemService itemService = ContentServiceFactory.getInstance().getItemService();

View File

@@ -13,6 +13,7 @@ import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.packager.PackageDisseminator;
@@ -208,7 +209,7 @@ public class RoleCrosswalk
try {
//Try to parse our XML results (which were disseminated by the Packager)
SAXBuilder builder = new SAXBuilder();
SAXBuilder builder = XMLUtils.getSAXBuilder();
Document xmlDocument = builder.build(tempFile);
//If XML parsed successfully, return root element of doc
if (xmlDocument != null && xmlDocument.hasRootElement()) {

View File

@@ -18,6 +18,7 @@ import javax.xml.transform.TransformerException;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Community;
@@ -297,7 +298,7 @@ public class XSLTIngestionCrosswalk
"Failed to initialize transformer, probably error loading stylesheet.");
}
SAXBuilder builder = new SAXBuilder();
SAXBuilder builder = XMLUtils.getSAXBuilder();
Document inDoc = builder.build(new FileInputStream(argv[i + 1]));
XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat());
List dimList;

View File

@@ -20,6 +20,7 @@ import java.util.List;
import org.apache.commons.codec.binary.Base64;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
@@ -265,12 +266,13 @@ public class METSManifest {
public static METSManifest create(InputStream is, boolean validate, String configName)
throws IOException,
MetadataValidationException {
SAXBuilder builder = new SAXBuilder(validate);
SAXBuilder builder = XMLUtils.getSAXBuilder();
builder.setIgnoringElementContentWhitespace(true);
// Set validation feature
if (validate) {
builder.setValidation(true);
builder.setFeature("http://apache.org/xml/features/validation/schema", true);
// Tell the parser where local copies of schemas are, to speed up
@@ -278,10 +280,6 @@ public class METSManifest {
if (localSchemas.length() > 0) {
builder.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", localSchemas);
}
} else {
// disallow DTD parsing to ensure no XXE attacks can occur.
// See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
}
// Parse the METS file

View File

@@ -21,6 +21,7 @@ import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.codec.DecoderException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Community;
@@ -385,7 +386,7 @@ public class RoleIngester implements PackageIngester {
Document document;
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.getDocumentBuilderFactory();
dbf.setIgnoringComments(true);
dbf.setCoalescing(true);
DocumentBuilder db = dbf.newDocumentBuilder();
@@ -419,7 +420,7 @@ public class RoleIngester implements PackageIngester {
Document document;
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.getDocumentBuilderFactory();
dbf.setIgnoringComments(true);
dbf.setCoalescing(true);
DocumentBuilder db = dbf.newDocumentBuilder();

View File

@@ -37,6 +37,7 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.client.DSpaceHttpClientFactory;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
@@ -176,7 +177,7 @@ public class MetadataWebService extends AbstractCurationTask implements Namespac
fieldSeparator = (fldSep != null) ? fldSep : " ";
urlTemplate = taskProperty("template");
templateParam = urlTemplate.substring(urlTemplate.indexOf("{") + 1,
urlTemplate.indexOf("}"));
urlTemplate.indexOf("}"));
String[] parsed = parseTransform(templateParam);
lookupField = parsed[0];
lookupTransform = parsed[1];
@@ -204,13 +205,9 @@ public class MetadataWebService extends AbstractCurationTask implements Namespac
}
}
// initialize response document parser
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
try {
// disallow DTD parsing to ensure no XXE attacks can occur
// See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setXIncludeAware(false);
DocumentBuilderFactory factory = XMLUtils.getDocumentBuilderFactory();
factory.setNamespaceAware(true);
docBuilder = factory.newDocumentBuilder();
} catch (ParserConfigurationException pcE) {
log.error("caught exception: " + pcE);

View File

@@ -16,6 +16,7 @@ import javax.xml.stream.XMLStreamReader;
import jakarta.xml.bind.JAXBContext;
import jakarta.xml.bind.JAXBException;
import jakarta.xml.bind.Unmarshaller;
import org.dspace.app.util.XMLUtils;
import org.xml.sax.SAXException;
/**
@@ -31,9 +32,7 @@ public abstract class Converter<T> {
protected Object unmarshall(InputStream input, Class<?> type) throws SAXException, URISyntaxException {
try {
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
// disallow DTD parsing to ensure no XXE attacks can occur
xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
XMLInputFactory xmlInputFactory = XMLUtils.getXMLInputFactory();
XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(input);
JAXBContext context = JAXBContext.newInstance(type);

View File

@@ -40,6 +40,7 @@ import org.apache.http.util.EntityUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.client.DSpaceHttpClientFactory;
import org.dspace.app.util.XMLUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.crosswalk.CrosswalkException;
@@ -829,7 +830,7 @@ public class DataCiteConnector
}
// parse the XML
SAXBuilder saxBuilder = new SAXBuilder();
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document doc = null;
try {
doc = saxBuilder.build(new ByteArrayInputStream(content.getBytes("UTF-8")));

View File

@@ -22,6 +22,7 @@ import jakarta.ws.rs.client.WebTarget;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import org.apache.commons.lang3.StringUtils;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -218,7 +219,7 @@ public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadata
if (response.getStatus() == 200) {
String responseString = response.readEntity(String.class);
SAXBuilder saxBuilder = new SAXBuilder();
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(responseString));
Element root = document.getRootElement();
@@ -399,7 +400,7 @@ public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadata
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();

View File

@@ -26,6 +26,7 @@ import org.apache.http.HttpException;
import org.apache.http.client.utils.URIBuilder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -301,9 +302,7 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
return root.getChildren();
@@ -356,9 +355,7 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
int url_len = this.url.length() - 1;
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
List<Namespace> namespaces = Arrays.asList(
@@ -420,9 +417,7 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
List<Namespace> namespaces = Arrays
@@ -449,4 +444,4 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata
return metadatumDTO;
}
}
}

View File

@@ -12,7 +12,6 @@ import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import com.fasterxml.jackson.core.JsonProcessingException;
@@ -21,6 +20,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.importer.external.metadatamapping.contributor.JsonPathMetadataProcessor;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
@@ -64,10 +64,9 @@ public class CrossRefAbstractProcessor implements JsonPathMetadataProcessor {
}
String xmlString = "<root>" + abstractValue + "</root>";
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
Document xmlDoc;
try {
DocumentBuilder builder = factory.newDocumentBuilder();
DocumentBuilder builder = XMLUtils.getDocumentBuilder();
InputSource is = new InputSource(new StringReader(xmlString));
xmlDoc = builder.parse(is);
} catch (SAXException | IOException | ParserConfigurationException e) {

View File

@@ -32,6 +32,7 @@ import org.apache.http.client.utils.URIBuilder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.xerces.impl.dv.util.Base64;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -397,9 +398,7 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
@@ -436,9 +435,7 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
@@ -489,9 +486,7 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
List<Namespace> namespaces = Arrays.asList(Namespace.getNamespace("ns", "http://www.epo.org/exchange"));

View File

@@ -20,6 +20,7 @@ import jakarta.ws.rs.client.ClientBuilder;
import jakarta.ws.rs.client.Invocation;
import jakarta.ws.rs.client.WebTarget;
import jakarta.ws.rs.core.Response;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -248,7 +249,7 @@ public class OpenAireImportMetadataSourceServiceImpl extends AbstractImportMetad
if (response.getStatus() == 200) {
String responseString = response.readEntity(String.class);
SAXBuilder saxBuilder = new SAXBuilder();
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(responseString));
Element root = document.getRootElement();
@@ -330,7 +331,7 @@ public class OpenAireImportMetadataSourceServiceImpl extends AbstractImportMetad
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();

View File

@@ -24,6 +24,7 @@ import java.util.concurrent.Callable;
import com.google.common.io.CharStreams;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.utils.URIBuilder;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -233,11 +234,13 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
String value = null;
try {
SAXBuilder saxBuilder = new SAXBuilder();
// Disallow external entities & entity expansion to protect against XXE attacks
// (NOTE: We receive errors if we disable all DTDs for PubMed, so this is the best we can do)
saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
// To properly parse PubMed responses, we must allow DOCTYPE/DTDs overall but
// we can still take advantage of entities themselves being disabled, and not
// expanded.
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd",
true);
Document document = saxBuilder.build(new StringReader(src));
Element root = document.getRootElement();
@@ -354,12 +357,7 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// Disallow external entities & entity expansion to protect against XXE attacks
// (NOTE: We receive errors if we disable all DTDs for PubMed, so this is the best we can do)
saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
saxBuilder.setExpandEntities(false);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();

View File

@@ -24,6 +24,7 @@ import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.utils.URIBuilder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -292,9 +293,7 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = liveImportClient.executeHttpGetRequest(1000, buildURI(1, query), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
Element element = root.getChild("hitCount");
@@ -365,9 +364,7 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
String cursorMark = StringUtils.EMPTY;
if (StringUtils.isNotBlank(response)) {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
XPathFactory xpfac = XPathFactory.instance();
XPathExpression<Element> xPath = xpfac.compile("//responseWrapper/resultList/result",
@@ -419,4 +416,4 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat
this.url = url;
}
}
}

View File

@@ -26,6 +26,7 @@ import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -208,9 +209,7 @@ public class ScopusImportMetadataSourceServiceImpl extends AbstractImportMetadat
return 0;
}
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
@@ -397,9 +396,7 @@ public class ScopusImportMetadataSourceServiceImpl extends AbstractImportMetadat
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
String totalResults = root.getChildText("totalResults", Namespace.getNamespace("http://a9.com/-/spec/opensearch/1.1/"));

View File

@@ -26,6 +26,7 @@ import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
@@ -145,9 +146,7 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
params.put(HEADER_PARAMETERS, getRequestParameters());
String response = liveImportClient.executeHttpGetRequest(timeout, url, params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@name=\"RecordsFound\"]",
@@ -288,9 +287,7 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
SAXBuilder saxBuilder = XMLUtils.getSAXBuilder();
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
String cData = XPathFactory.instance().compile("//*[@name=\"Records\"]",
@@ -332,4 +329,4 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
this.apiKey = apiKey;
}
}
}

View File

@@ -28,6 +28,7 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.client.DSpaceHttpClientFactory;
import org.dspace.app.util.XMLUtils;
import org.dspace.services.ConfigurationService;
import org.jdom2.Attribute;
import org.jdom2.Document;
@@ -50,7 +51,7 @@ public class CCLicenseConnectorServiceImpl implements CCLicenseConnectorService,
private Logger log = org.apache.logging.log4j.LogManager.getLogger(CCLicenseConnectorServiceImpl.class);
private CloseableHttpClient client;
protected SAXBuilder parser = new SAXBuilder();
protected SAXBuilder parser = XMLUtils.getSAXBuilder();
private String postArgument = "answers";
private String postAnswerFormat =

View File

@@ -43,6 +43,7 @@ import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.dspace.app.client.DSpaceHttpClientFactory;
import org.dspace.app.util.XMLUtils;
import org.dspace.orcid.OrcidToken;
import org.dspace.orcid.exception.OrcidClientException;
import org.dspace.orcid.model.OrcidEntityType;
@@ -351,8 +352,7 @@ public class OrcidClientImpl implements OrcidClient {
@SuppressWarnings("unchecked")
private <T> T unmarshall(HttpEntity entity, Class<T> clazz) throws Exception {
JAXBContext jaxbContext = JAXBContext.newInstance(clazz);
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
XMLInputFactory xmlInputFactory = XMLUtils.getXMLInputFactory();
XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(entity.getContent());
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
return (T) unmarshaller.unmarshal(xmlStreamReader);

View File

@@ -12,7 +12,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPath;
@@ -20,6 +19,7 @@ import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.dspace.app.util.XMLUtils;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.w3c.dom.Document;
@@ -71,7 +71,7 @@ public class ControlledVocabulary {
File controlledVocFile = new File(filePath.toString());
if (controlledVocFile.exists()) {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
DocumentBuilder builder = XMLUtils.getDocumentBuilder();
Document document = builder.parse(controlledVocFile);
XPath xPath = XPathFactory.newInstance().newXPath();
Node node = (Node) xPath.compile("node").evaluate(document, XPathConstants.NODE);