diff --git a/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java b/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java index 27a6534213..c74e56bce8 100644 --- a/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java +++ b/dspace-api/src/main/java/org/dspace/administer/RegistryImporter.java @@ -10,7 +10,6 @@ package org.dspace.administer; import java.io.File; import java.io.IOException; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -18,6 +17,7 @@ import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; +import org.dspace.app.util.XMLUtils; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -49,8 +49,9 @@ public class RegistryImporter { */ public static Document loadXML(String filename) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); + // This XML builder will *not* disable external entities as XML + // registries are considered trusted content + DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder(); Document document = builder.parse(new File(filename)); diff --git a/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java b/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java index d503bfc00b..8bb72e1852 100644 --- a/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java +++ b/dspace-api/src/main/java/org/dspace/administer/RegistryLoader.java @@ -13,7 +13,6 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -29,6 +28,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.BitstreamFormat; import org.dspace.content.factory.ContentServiceFactory; @@ -266,8 +266,9 @@ public class RegistryLoader { */ private static Document loadXML(String filename) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); + // This XML builder will *not* disable external entities as XML + // registries are considered trusted content + DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder(); return builder.parse(new File(filename)); } @@ -351,4 +352,4 @@ public class RegistryLoader { return data; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java b/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java index 8bbcfe0ff7..f2577a37b1 100644 --- a/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java +++ b/dspace-api/src/main/java/org/dspace/administer/StructBuilder.java @@ -27,7 +27,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -43,6 +42,7 @@ import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -613,8 +613,8 @@ public class StructBuilder { */ private static org.w3c.dom.Document loadXML(InputStream input) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); + // This builder factory does not disable external DTD, entities, etc. + DocumentBuilder builder = XMLUtils.getTrustedDocumentBuilder(); org.w3c.dom.Document document = builder.parse(input); diff --git a/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java b/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java index 5b862a6e04..614393bd0a 100644 --- a/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/app/itemimport/ItemImportServiceImpl.java @@ -48,7 +48,6 @@ import java.util.UUID; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -68,6 +67,7 @@ import org.apache.logging.log4j.Logger; import org.dspace.app.itemimport.service.ItemImportService; import org.dspace.app.util.LocalSchemaFilenameFilter; import org.dspace.app.util.RelationshipUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.ResourcePolicy; import org.dspace.authorize.service.AuthorizeService; @@ -180,6 +180,8 @@ public class ItemImportServiceImpl implements ItemImportService, InitializingBea @Autowired(required = true) protected MetadataValueService metadataValueService; + protected DocumentBuilder builder; + protected String tempWorkDir; protected boolean isTest = false; @@ -1889,9 +1891,7 @@ public class ItemImportServiceImpl implements ItemImportService, InitializingBea */ protected Document loadXML(String filename) throws IOException, ParserConfigurationException, SAXException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance() - .newDocumentBuilder(); - + DocumentBuilder builder = XMLUtils.getDocumentBuilder(); return builder.parse(new File(filename)); } diff --git a/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java b/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java index a3dead0574..efff52df89 100644 --- a/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java +++ b/dspace-api/src/main/java/org/dspace/app/itemupdate/ItemArchive.java @@ -23,8 +23,6 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.UUID; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; @@ -33,6 +31,7 @@ import javax.xml.transform.TransformerFactory; import org.apache.logging.log4j.Logger; import org.dspace.app.util.LocalSchemaFilenameFilter; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; @@ -52,7 +51,6 @@ public class ItemArchive { public static final String DUBLIN_CORE_XML = "dublin_core.xml"; - protected static DocumentBuilder builder = null; protected Transformer transformer = null; protected List dtomList = null; @@ -95,14 +93,14 @@ public class ItemArchive { InputStream is = null; try { is = new FileInputStream(new File(dir, DUBLIN_CORE_XML)); - itarch.dtomList = MetadataUtilities.loadDublinCore(getDocumentBuilder(), is); + itarch.dtomList = MetadataUtilities.loadDublinCore(XMLUtils.getDocumentBuilder(), is); //The code to search for local schema files was copied from org.dspace.app.itemimport // .ItemImportServiceImpl.java File file[] = dir.listFiles(new LocalSchemaFilenameFilter()); for (int i = 0; i < file.length; i++) { is = new FileInputStream(file[i]); - itarch.dtomList.addAll(MetadataUtilities.loadDublinCore(getDocumentBuilder(), is)); + itarch.dtomList.addAll(MetadataUtilities.loadDublinCore(XMLUtils.getDocumentBuilder(), is)); } } finally { if (is != null) { @@ -126,14 +124,6 @@ public class ItemArchive { return itarch; } - protected static DocumentBuilder getDocumentBuilder() - throws ParserConfigurationException { - if (builder == null) { - builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - } - return builder; - } - /** * Getter for Transformer * @@ -318,7 +308,7 @@ public class ItemArchive { try { out = new FileOutputStream(new File(dir, "dublin_core.xml")); - Document doc = MetadataUtilities.writeDublinCore(getDocumentBuilder(), undoDtomList); + Document doc = MetadataUtilities.writeDublinCore(XMLUtils.getDocumentBuilder(), undoDtomList); MetadataUtilities.writeDocument(doc, getTransformer(), out); // if undo has delete bitstream diff --git a/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java b/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java index 89a416bfa8..ab8807c2ca 100644 --- a/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java +++ b/dspace-api/src/main/java/org/dspace/app/launcher/ScriptLauncher.java @@ -19,6 +19,7 @@ import java.util.TreeMap; import org.apache.commons.cli.ParseException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.core.Context; import org.dspace.scripts.DSpaceRunnable; import org.dspace.scripts.DSpaceRunnable.StepResult; @@ -314,7 +315,7 @@ public class ScriptLauncher { String config = kernelImpl.getConfigurationService().getProperty("dspace.dir") + System.getProperty("file.separator") + "config" + System.getProperty("file.separator") + "launcher.xml"; - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document doc = null; try { doc = saxBuilder.build(config); diff --git a/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java b/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java index 184f00a53e..d3b447374a 100644 --- a/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/app/sfx/SFXFileReaderServiceImpl.java @@ -18,6 +18,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.dspace.app.sfx.service.SFXFileReaderService; +import org.dspace.app.util.XMLUtils; import org.dspace.content.DCPersonName; import org.dspace.content.Item; import org.dspace.content.MetadataValue; @@ -79,9 +80,9 @@ public class SFXFileReaderServiceImpl implements SFXFileReaderService { log.info("Parsing XML file... " + fileName); DocumentBuilder docBuilder; Document doc = null; - DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); - docBuilderFactory.setIgnoringElementContentWhitespace(true); try { + DocumentBuilderFactory docBuilderFactory = XMLUtils.getDocumentBuilderFactory(); + docBuilderFactory.setIgnoringElementContentWhitespace(true); docBuilder = docBuilderFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { log.error("Wrong parser configuration: " + e.getMessage()); diff --git a/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java b/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java index c77c3bf10e..d2f3ac0dc5 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java +++ b/dspace-api/src/main/java/org/dspace/app/util/DCInputsReader.java @@ -121,7 +121,11 @@ public class DCInputsReader { String uri = "file:" + new File(fileName).getAbsolutePath(); try { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + // This document builder factory will *not* disable external + // entities as they can be useful in managing large forms, but + // it is up to site administrators to validate the XML they are + // storing + DocumentBuilderFactory factory = XMLUtils.getTrustedDocumentBuilderFactory(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); diff --git a/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java b/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java index e53d29ecfb..acc360bbc3 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java +++ b/dspace-api/src/main/java/org/dspace/app/util/InitializeEntities.java @@ -11,7 +11,6 @@ import java.io.File; import java.io.IOException; import java.sql.SQLException; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.cli.CommandLine; @@ -139,8 +138,9 @@ public class InitializeEntities { private void parseXMLToRelations(Context context, String fileLocation) throws AuthorizeException { try { File fXmlFile = new File(fileLocation); - DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); + // This XML builder will allow external entities, so the relationship types XML should + // be considered trusted by administrators + DocumentBuilder dBuilder = XMLUtils.getTrustedDocumentBuilder(); Document doc = dBuilder.parse(fXmlFile); doc.getDocumentElement().normalize(); diff --git a/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java b/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java index 0f8f745680..980f028545 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java +++ b/dspace-api/src/main/java/org/dspace/app/util/SubmissionConfigReader.java @@ -170,8 +170,11 @@ public class SubmissionConfigReader { String uri = "file:" + new File(fileName).getAbsolutePath(); try { - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); + // This document builder factory will *not* disable external + // entities as they can be useful in managing large forms, but + // it is up to site administrators to validate the XML they are + // storing + DocumentBuilderFactory factory = XMLUtils.getTrustedDocumentBuilderFactory(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); @@ -732,4 +735,4 @@ public class SubmissionConfigReader { } return results; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java b/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java index c39d0d26fd..389d53fe6d 100644 --- a/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java +++ b/dspace-api/src/main/java/org/dspace/app/util/XMLUtils.java @@ -9,8 +9,13 @@ package org.dspace.app.util; import java.util.ArrayList; import java.util.List; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.XMLInputFactory; import org.apache.commons.lang3.StringUtils; +import org.jdom2.input.SAXBuilder; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -161,4 +166,122 @@ public class XMLUtils { } return result; } + + /** + * Initialize and return a javax DocumentBuilderFactory with NO security + * applied. This is intended only for internal, administrative/configuration + * use where external entities and other dangerous features are actually + * purposefully included. + * The method here is tiny, but may be expanded with other features like + * whitespace handling, and calling this method name helps to document + * the fact that the caller knows it is trusting the XML source / factory. + * + * @return document builder factory to generate new builders + * @throws ParserConfigurationException + */ + public static DocumentBuilderFactory getTrustedDocumentBuilderFactory() + throws ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + return factory; + } + + /** + * Initialize and return the javax DocumentBuilderFactory with some basic security + * applied to avoid XXE attacks and other unwanted content inclusion + * @return document builder factory to generate new builders + * @throws ParserConfigurationException + */ + public static DocumentBuilderFactory getDocumentBuilderFactory() + throws ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + // No DOCTYPE / DTDs + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + // No external general entities + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + // No external parameter entities + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + // No external DTDs + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + // Even if entities somehow get defined, they will not be expanded + factory.setExpandEntityReferences(false); + // Disable "XInclude" markup processing + factory.setXIncludeAware(false); + + return factory; + } + + /** + * Initialize and return a javax DocumentBuilder with NO security + * applied. This is intended only for internal, administrative/configuration + * use where external entities and other dangerous features are actually + * purposefully included. + * The method here is tiny, but may be expanded with other features like + * whitespace handling, and calling this method name helps to document + * the fact that the caller knows it is trusting the XML source / builder + * + * @return document builder with no security features set + * @throws ParserConfigurationException + */ + public static DocumentBuilder getTrustedDocumentBuilder() + throws ParserConfigurationException { + return getTrustedDocumentBuilderFactory().newDocumentBuilder(); + } + + /** + * Initialize and return the javax DocumentBuilder with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @return document builder for use in XML parsing + * @throws ParserConfigurationException + */ + public static DocumentBuilder getDocumentBuilder() + throws ParserConfigurationException { + return getDocumentBuilderFactory().newDocumentBuilder(); + } + + /** + * Initialize and return the SAX document builder with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @return SAX document builder for use in XML parsing + */ + public static SAXBuilder getSAXBuilder() { + return getSAXBuilder(false); + } + + /** + * Initialize and return the SAX document builder with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @param validate whether to use JDOM XSD validation + * @return SAX document builder for use in XML parsing + */ + public static SAXBuilder getSAXBuilder(boolean validate) { + SAXBuilder saxBuilder = new SAXBuilder(); + if (validate) { + saxBuilder.setValidation(true); + } + // No DOCTYPE / DTDs + saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + // No external general entities + saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); + // No external parameter entities + saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + // No external DTDs + saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + // Don't expand entities + saxBuilder.setExpandEntities(false); + + return saxBuilder; + } + + /** + * Initialize and return the Java XML Input Factory with some basic security applied + * to avoid XXE attacks and other unwanted content inclusion + * @return XML input factory for use in XML parsing + */ + public static XMLInputFactory getXMLInputFactory() { + XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); + xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + + return xmlInputFactory; + } + } diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java index b8a4a8aef3..5ceacc933e 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/METSDisseminationCrosswalk.java @@ -14,6 +14,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.ArrayUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.packager.PackageDisseminator; @@ -129,7 +130,7 @@ public class METSDisseminationCrosswalk try { //Return just the root Element of the METS file - SAXBuilder builder = new SAXBuilder(); + SAXBuilder builder = XMLUtils.getSAXBuilder(); Document metsDocument = builder.build(tempFile); return metsDocument.getRootElement(); } catch (JDOMException je) { diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java index 6315f73a7c..3b12c5a63c 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java @@ -22,6 +22,7 @@ import java.util.Properties; import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -144,7 +145,7 @@ public class MODSDisseminationCrosswalk extends SelfNamedPlugin MODS_NS.getURI() + " " + MODS_XSD; private static final XMLOutputter outputUgly = new XMLOutputter(); - private static final SAXBuilder builder = new SAXBuilder(); + private static final SAXBuilder builder = XMLUtils.getSAXBuilder(); private Map modsMap = null; diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java index fa582fd62f..43b8820976 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/QDCCrosswalk.java @@ -22,6 +22,7 @@ import java.util.Properties; import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; @@ -125,7 +126,7 @@ public class QDCCrosswalk extends SelfNamedPlugin // XML schemaLocation fragment for this crosswalk, from config. private String schemaLocation = null; - private static final SAXBuilder builder = new SAXBuilder(); + private static final SAXBuilder builder = XMLUtils.getSAXBuilder(); protected ItemService itemService = ContentServiceFactory.getInstance().getItemService(); diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java index 2c763036ce..8d5bf49902 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/RoleCrosswalk.java @@ -13,6 +13,7 @@ import java.io.IOException; import java.sql.SQLException; import java.util.List; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.packager.PackageDisseminator; @@ -208,7 +209,7 @@ public class RoleCrosswalk try { //Try to parse our XML results (which were disseminated by the Packager) - SAXBuilder builder = new SAXBuilder(); + SAXBuilder builder = XMLUtils.getSAXBuilder(); Document xmlDocument = builder.build(tempFile); //If XML parsed successfully, return root element of doc if (xmlDocument != null && xmlDocument.hasRootElement()) { diff --git a/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java b/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java index 63ef5f7336..b07b2b2228 100644 --- a/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java +++ b/dspace-api/src/main/java/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java @@ -18,6 +18,7 @@ import javax.xml.transform.TransformerException; import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -297,7 +298,7 @@ public class XSLTIngestionCrosswalk "Failed to initialize transformer, probably error loading stylesheet."); } - SAXBuilder builder = new SAXBuilder(); + SAXBuilder builder = XMLUtils.getSAXBuilder(); Document inDoc = builder.build(new FileInputStream(argv[i + 1])); XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat()); List dimList; diff --git a/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java b/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java index 3399bdf0f0..a1ed3c1243 100644 --- a/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java +++ b/dspace-api/src/main/java/org/dspace/content/packager/METSManifest.java @@ -20,6 +20,7 @@ import java.util.List; import org.apache.commons.codec.binary.Base64; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.Bundle; @@ -265,12 +266,13 @@ public class METSManifest { public static METSManifest create(InputStream is, boolean validate, String configName) throws IOException, MetadataValidationException { - SAXBuilder builder = new SAXBuilder(validate); + SAXBuilder builder = XMLUtils.getSAXBuilder(); builder.setIgnoringElementContentWhitespace(true); // Set validation feature if (validate) { + builder.setValidation(true); builder.setFeature("http://apache.org/xml/features/validation/schema", true); // Tell the parser where local copies of schemas are, to speed up @@ -278,10 +280,6 @@ public class METSManifest { if (localSchemas.length() > 0) { builder.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", localSchemas); } - } else { - // disallow DTD parsing to ensure no XXE attacks can occur. - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); } // Parse the METS file diff --git a/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java b/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java index ca27abe206..a0ac43282c 100644 --- a/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java +++ b/dspace-api/src/main/java/org/dspace/content/packager/RoleIngester.java @@ -21,6 +21,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.codec.DecoderException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Collection; import org.dspace.content.Community; @@ -385,7 +386,7 @@ public class RoleIngester implements PackageIngester { Document document; try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.getDocumentBuilderFactory(); dbf.setIgnoringComments(true); dbf.setCoalescing(true); DocumentBuilder db = dbf.newDocumentBuilder(); @@ -419,7 +420,7 @@ public class RoleIngester implements PackageIngester { Document document; try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.getDocumentBuilderFactory(); dbf.setIgnoringComments(true); dbf.setCoalescing(true); DocumentBuilder db = dbf.newDocumentBuilder(); diff --git a/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java b/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java index fc62d7a4b2..1b618ad31f 100644 --- a/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java +++ b/dspace-api/src/main/java/org/dspace/ctask/general/MetadataWebService.java @@ -37,6 +37,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.dspace.app.client.DSpaceHttpClientFactory; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; @@ -176,7 +177,7 @@ public class MetadataWebService extends AbstractCurationTask implements Namespac fieldSeparator = (fldSep != null) ? fldSep : " "; urlTemplate = taskProperty("template"); templateParam = urlTemplate.substring(urlTemplate.indexOf("{") + 1, - urlTemplate.indexOf("}")); + urlTemplate.indexOf("}")); String[] parsed = parseTransform(templateParam); lookupField = parsed[0]; lookupTransform = parsed[1]; @@ -204,13 +205,9 @@ public class MetadataWebService extends AbstractCurationTask implements Namespac } } // initialize response document parser - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); try { - // disallow DTD parsing to ensure no XXE attacks can occur - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - factory.setXIncludeAware(false); + DocumentBuilderFactory factory = XMLUtils.getDocumentBuilderFactory(); + factory.setNamespaceAware(true); docBuilder = factory.newDocumentBuilder(); } catch (ParserConfigurationException pcE) { log.error("caught exception: " + pcE); diff --git a/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java b/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java index 578db6c567..ab641c0004 100644 --- a/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java +++ b/dspace-api/src/main/java/org/dspace/external/provider/orcid/xml/Converter.java @@ -16,6 +16,7 @@ import javax.xml.stream.XMLStreamReader; import jakarta.xml.bind.JAXBContext; import jakarta.xml.bind.JAXBException; import jakarta.xml.bind.Unmarshaller; +import org.dspace.app.util.XMLUtils; import org.xml.sax.SAXException; /** @@ -31,9 +32,7 @@ public abstract class Converter { protected Object unmarshall(InputStream input, Class type) throws SAXException, URISyntaxException { try { - XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); - // disallow DTD parsing to ensure no XXE attacks can occur - xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + XMLInputFactory xmlInputFactory = XMLUtils.getXMLInputFactory(); XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(input); JAXBContext context = JAXBContext.newInstance(type); diff --git a/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java b/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java index 368662026a..580fe5ab2f 100644 --- a/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java +++ b/dspace-api/src/main/java/org/dspace/identifier/doi/DataCiteConnector.java @@ -40,6 +40,7 @@ import org.apache.http.util.EntityUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.dspace.app.client.DSpaceHttpClientFactory; +import org.dspace.app.util.XMLUtils; import org.dspace.authorize.AuthorizeException; import org.dspace.content.DSpaceObject; import org.dspace.content.crosswalk.CrosswalkException; @@ -829,7 +830,7 @@ public class DataCiteConnector } // parse the XML - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document doc = null; try { doc = saxBuilder.build(new ByteArrayInputStream(content.getBytes("UTF-8"))); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 660e0c9754..a790c6a9a5 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -22,6 +22,7 @@ import jakarta.ws.rs.client.WebTarget; import jakarta.ws.rs.core.MediaType; import jakarta.ws.rs.core.Response; import org.apache.commons.lang3.StringUtils; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -218,7 +219,7 @@ public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadata if (response.getStatus() == 200) { String responseString = response.readEntity(String.class); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(responseString)); Element root = document.getRootElement(); @@ -399,7 +400,7 @@ public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadata private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java index 82a4b2d779..41c80ab7fe 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/cinii/CiniiImportMetadataSourceServiceImpl.java @@ -26,6 +26,7 @@ import org.apache.http.HttpException; import org.apache.http.client.utils.URIBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -301,9 +302,7 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); return root.getChildren(); @@ -356,9 +355,7 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata Map> params = new HashMap>(); String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); int url_len = this.url.length() - 1; - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); List namespaces = Arrays.asList( @@ -420,9 +417,7 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata Map> params = new HashMap>(); String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); List namespaces = Arrays @@ -449,4 +444,4 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata return metadatumDTO; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/crossref/CrossRefAbstractProcessor.java b/dspace-api/src/main/java/org/dspace/importer/external/crossref/CrossRefAbstractProcessor.java index 1b6da9d37b..99f1ee37a5 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/crossref/CrossRefAbstractProcessor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/crossref/CrossRefAbstractProcessor.java @@ -12,7 +12,6 @@ import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import com.fasterxml.jackson.core.JsonProcessingException; @@ -21,6 +20,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.importer.external.metadatamapping.contributor.JsonPathMetadataProcessor; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -64,10 +64,9 @@ public class CrossRefAbstractProcessor implements JsonPathMetadataProcessor { } String xmlString = "" + abstractValue + ""; - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); Document xmlDoc; try { - DocumentBuilder builder = factory.newDocumentBuilder(); + DocumentBuilder builder = XMLUtils.getDocumentBuilder(); InputSource is = new InputSource(new StringReader(xmlString)); xmlDoc = builder.parse(is); } catch (SAXException | IOException | ParserConfigurationException e) { diff --git a/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java index 70f726c5f9..d140eb2a33 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java @@ -32,6 +32,7 @@ import org.apache.http.client.utils.URIBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.xerces.impl.dv.util.Base64; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -397,9 +398,7 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); @@ -436,9 +435,7 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); @@ -489,9 +486,7 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); List namespaces = Arrays.asList(Namespace.getNamespace("ns", "http://www.epo.org/exchange")); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/openaire/service/OpenAireImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/openaire/service/OpenAireImportMetadataSourceServiceImpl.java index 7fb5f27354..ea5b7a6761 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/openaire/service/OpenAireImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/openaire/service/OpenAireImportMetadataSourceServiceImpl.java @@ -20,6 +20,7 @@ import jakarta.ws.rs.client.ClientBuilder; import jakarta.ws.rs.client.Invocation; import jakarta.ws.rs.client.WebTarget; import jakarta.ws.rs.core.Response; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -248,7 +249,7 @@ public class OpenAireImportMetadataSourceServiceImpl extends AbstractImportMetad if (response.getStatus() == 200) { String responseString = response.readEntity(String.class); - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(responseString)); Element root = document.getRootElement(); @@ -330,7 +331,7 @@ public class OpenAireImportMetadataSourceServiceImpl extends AbstractImportMetad private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java index 1523c2c6bc..daf0d2131e 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/pubmed/service/PubmedImportMetadataSourceServiceImpl.java @@ -25,6 +25,7 @@ import java.util.concurrent.Callable; import com.google.common.io.CharStreams; import org.apache.commons.lang3.StringUtils; import org.apache.http.client.utils.URIBuilder; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -234,11 +235,13 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat String value = null; try { - SAXBuilder saxBuilder = new SAXBuilder(); - // Disallow external entities & entity expansion to protect against XXE attacks - // (NOTE: We receive errors if we disable all DTDs for PubMed, so this is the best we can do) - saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); - saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); + // To properly parse PubMed responses, we must allow DOCTYPE/DTDs overall but + // we can still take advantage of entities themselves being disabled, and not + // expanded. + saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false); + saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", + true); Document document = saxBuilder.build(new StringReader(src)); Element root = document.getRootElement(); @@ -355,12 +358,7 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); - // Disallow external entities & entity expansion to protect against XXE attacks - // (NOTE: We receive errors if we disable all DTDs for PubMed, so this is the best we can do) - saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); - saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false); - saxBuilder.setExpandEntities(false); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java index 7cd297eb28..24f40339dd 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/pubmedeurope/PubmedEuropeMetadataSourceServiceImpl.java @@ -24,6 +24,7 @@ import org.apache.http.client.ClientProtocolException; import org.apache.http.client.utils.URIBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -292,9 +293,7 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat Map> params = new HashMap>(); String response = liveImportClient.executeHttpGetRequest(1000, buildURI(1, query), params); - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); Element element = root.getChild("hitCount"); @@ -365,9 +364,7 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params); String cursorMark = StringUtils.EMPTY; if (StringUtils.isNotBlank(response)) { - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); XPathFactory xpfac = XPathFactory.instance(); XPathExpression xPath = xpfac.compile("//responseWrapper/resultList/result", @@ -419,4 +416,4 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat this.url = url; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java index a3f74694be..22e3534ca8 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/scopus/service/ScopusImportMetadataSourceServiceImpl.java @@ -26,6 +26,7 @@ import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -208,9 +209,7 @@ public class ScopusImportMetadataSourceServiceImpl extends AbstractImportMetadat return 0; } - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); @@ -397,9 +396,7 @@ public class ScopusImportMetadataSourceServiceImpl extends AbstractImportMetadat private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); String totalResults = root.getChildText("totalResults", Namespace.getNamespace("http://a9.com/-/spec/opensearch/1.1/")); diff --git a/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java index 9bffa2a84a..2ac63d5051 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/wos/service/WOSImportMetadataSourceServiceImpl.java @@ -26,6 +26,7 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -145,9 +146,7 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo params.put(HEADER_PARAMETERS, getRequestParameters()); String response = liveImportClient.executeHttpGetRequest(timeout, url, params); - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(response)); Element root = document.getRootElement(); XPathExpression xpath = XPathFactory.instance().compile("//*[@name=\"RecordsFound\"]", @@ -288,9 +287,7 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo private List splitToRecords(String recordsSrc) { try { - SAXBuilder saxBuilder = new SAXBuilder(); - // disallow DTD parsing to ensure no XXE attacks can occur - saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true); + SAXBuilder saxBuilder = XMLUtils.getSAXBuilder(); Document document = saxBuilder.build(new StringReader(recordsSrc)); Element root = document.getRootElement(); String cData = XPathFactory.instance().compile("//*[@name=\"Records\"]", @@ -332,4 +329,4 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo this.apiKey = apiKey; } -} \ No newline at end of file +} diff --git a/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java b/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java index 1d777a2e13..3c9088bda5 100644 --- a/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/license/CCLicenseConnectorServiceImpl.java @@ -28,6 +28,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.util.EntityUtils; import org.apache.logging.log4j.Logger; import org.dspace.app.client.DSpaceHttpClientFactory; +import org.dspace.app.util.XMLUtils; import org.dspace.services.ConfigurationService; import org.jdom2.Attribute; import org.jdom2.Document; @@ -50,7 +51,7 @@ public class CCLicenseConnectorServiceImpl implements CCLicenseConnectorService, private Logger log = org.apache.logging.log4j.LogManager.getLogger(CCLicenseConnectorServiceImpl.class); private CloseableHttpClient client; - protected SAXBuilder parser = new SAXBuilder(); + protected SAXBuilder parser = XMLUtils.getSAXBuilder(); private String postArgument = "answers"; private String postAnswerFormat = diff --git a/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java b/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java index c872f5c6ff..aa80657780 100644 --- a/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java +++ b/dspace-api/src/main/java/org/dspace/orcid/client/OrcidClientImpl.java @@ -43,6 +43,7 @@ import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.message.BasicNameValuePair; import org.dspace.app.client.DSpaceHttpClientFactory; +import org.dspace.app.util.XMLUtils; import org.dspace.orcid.OrcidToken; import org.dspace.orcid.exception.OrcidClientException; import org.dspace.orcid.model.OrcidEntityType; @@ -351,8 +352,7 @@ public class OrcidClientImpl implements OrcidClient { @SuppressWarnings("unchecked") private T unmarshall(HttpEntity entity, Class clazz) throws Exception { JAXBContext jaxbContext = JAXBContext.newInstance(clazz); - XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); - xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + XMLInputFactory xmlInputFactory = XMLUtils.getXMLInputFactory(); XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(entity.getContent()); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); return (T) unmarshaller.unmarshal(xmlStreamReader); diff --git a/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java b/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java index 7f2bdc6ef7..bd19a1254f 100644 --- a/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java +++ b/dspace-api/src/main/java/org/dspace/vocabulary/ControlledVocabulary.java @@ -12,7 +12,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.xpath.XPath; @@ -20,6 +19,7 @@ import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; +import org.dspace.app.util.XMLUtils; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; import org.w3c.dom.Document; @@ -71,7 +71,7 @@ public class ControlledVocabulary { File controlledVocFile = new File(filePath.toString()); if (controlledVocFile.exists()) { - DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + DocumentBuilder builder = XMLUtils.getDocumentBuilder(); Document document = builder.parse(controlledVocFile); XPath xPath = XPathFactory.newInstance().newXPath(); Node node = (Node) xPath.compile("node").evaluate(document, XPathConstants.NODE);