Merge pull request #8854 from tdonohue/disable_dtd_parsing_in_sources

Disable DTD parsing in all external source plugins
This commit is contained in:
Tim Donohue
2023-06-20 09:22:15 -05:00
committed by GitHub
8 changed files with 42 additions and 4 deletions

View File

@@ -207,9 +207,10 @@ public class MetadataWebService extends AbstractCurationTask implements Namespac
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
try {
// disallow DTD parsing to ensure no XXE attacks can occur.
// disallow DTD parsing to ensure no XXE attacks can occur
// See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setXIncludeAware(false);
docBuilder = factory.newDocumentBuilder();
} catch (ParserConfigurationException pcE) {
log.error("caught exception: " + pcE);

View File

@@ -12,6 +12,9 @@ import java.net.URISyntaxException;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.xml.sax.SAXException;
@@ -28,11 +31,16 @@ public abstract class Converter<T> {
protected Object unmarshall(InputStream input, Class<?> type) throws SAXException, URISyntaxException {
try {
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
// disallow DTD parsing to ensure no XXE attacks can occur
xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(input);
JAXBContext context = JAXBContext.newInstance(type);
Unmarshaller unmarshaller = context.createUnmarshaller();
return unmarshaller.unmarshal(input);
} catch (JAXBException e) {
throw new RuntimeException("Unable to unmarshall orcid message" + e);
return unmarshaller.unmarshal(xmlStreamReader);
} catch (JAXBException | XMLStreamException e) {
throw new RuntimeException("Unable to unmarshall orcid message: " + e);
}
}
}

View File

@@ -303,6 +303,8 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
return root.getChildren();
@@ -356,6 +358,8 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
int url_len = this.url.length() - 1;
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
List<Namespace> namespaces = Arrays.asList(
@@ -418,6 +422,8 @@ public class CiniiImportMetadataSourceServiceImpl extends AbstractImportMetadata
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
List<Namespace> namespaces = Arrays

View File

@@ -398,6 +398,8 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
@@ -435,6 +437,8 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
@@ -486,6 +490,8 @@ public class EpoImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
List<Namespace> namespaces = Arrays.asList(Namespace.getNamespace("ns", "http://www.epo.org/exchange"));

View File

@@ -351,6 +351,11 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// Disallow external entities & entity expansion to protect against XXE attacks
// (NOTE: We receive errors if we disable all DTDs for PubMed, so this is the best we can do)
saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
saxBuilder.setExpandEntities(false);
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();

View File

@@ -294,6 +294,8 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat
String response = liveImportClient.executeHttpGetRequest(1000, buildURI(1, query), params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
Element element = root.getChild("hitCount");
@@ -365,6 +367,8 @@ public class PubmedEuropeMetadataSourceServiceImpl extends AbstractImportMetadat
String cursorMark = StringUtils.EMPTY;
if (StringUtils.isNotBlank(response)) {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
XPathFactory xpfac = XPathFactory.instance();
XPathExpression<Element> xPath = xpfac.compile("//responseWrapper/resultList/result",

View File

@@ -202,6 +202,8 @@ public class ScopusImportMetadataSourceServiceImpl extends AbstractImportMetadat
String response = liveImportClient.executeHttpGetRequest(timeout, url, params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
@@ -377,6 +379,8 @@ public class ScopusImportMetadataSourceServiceImpl extends AbstractImportMetadat
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
List<Element> records = root.getChildren("entry",Namespace.getNamespace("http://www.w3.org/2005/Atom"));

View File

@@ -147,6 +147,8 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
String response = liveImportClient.executeHttpGetRequest(timeout, url, params);
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(response));
Element root = document.getRootElement();
XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@name=\"RecordsFound\"]",
@@ -285,6 +287,8 @@ public class WOSImportMetadataSourceServiceImpl extends AbstractImportMetadataSo
private List<Element> splitToRecords(String recordsSrc) {
try {
SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
String cData = XPathFactory.instance().compile("//*[@name=\"Records\"]",