Merge pull request #8335 from 4Science/CST-5997-LiveImportFrom-Pubmed-NotWorking

Pubmed query not working
This commit is contained in:
Tim Donohue
2022-06-10 16:28:51 -05:00
committed by GitHub
6 changed files with 162 additions and 204 deletions

View File

@@ -74,7 +74,8 @@ public class LiveImportClientImpl implements LiveImportClient {
HttpResponse httpResponse = httpClient.execute(method); HttpResponse httpResponse = httpClient.execute(method);
if (isNotSuccessfull(httpResponse)) { if (isNotSuccessfull(httpResponse)) {
throw new RuntimeException(); throw new RuntimeException("The request failed with: " + getStatusCode(httpResponse) + " code, reason= "
+ httpResponse.getStatusLine().getReasonPhrase());
} }
InputStream inputStream = httpResponse.getEntity().getContent(); InputStream inputStream = httpResponse.getEntity().getContent();
return IOUtils.toString(inputStream, Charset.defaultCharset()); return IOUtils.toString(inputStream, Charset.defaultCharset());

View File

@@ -14,23 +14,23 @@ import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.Invocation;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import com.google.common.io.CharStreams; import com.google.common.io.CharStreams;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.utils.URIBuilder;
import org.dspace.content.Item; import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query; import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.FileMultipleOccurencesException; import org.dspace.importer.external.exception.FileMultipleOccurencesException;
import org.dspace.importer.external.exception.FileSourceException; import org.dspace.importer.external.exception.FileSourceException;
import org.dspace.importer.external.exception.MetadataSourceException; import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
import org.dspace.importer.external.service.AbstractImportMetadataSourceService; import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
import org.dspace.importer.external.service.components.FileSource; import org.dspace.importer.external.service.components.FileSource;
import org.dspace.importer.external.service.components.QuerySource; import org.dspace.importer.external.service.components.QuerySource;
@@ -41,6 +41,7 @@ import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder; import org.jdom2.input.SAXBuilder;
import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory; import org.jdom2.xpath.XPathFactory;
import org.springframework.beans.factory.annotation.Autowired;
/** /**
* Implements a data source for querying PubMed Central * Implements a data source for querying PubMed Central
@@ -51,13 +52,16 @@ import org.jdom2.xpath.XPathFactory;
public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Element> public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Element>
implements QuerySource, FileSource { implements QuerySource, FileSource {
private String baseAddress; private String urlFetch;
private String urlSearch;
// it is protected so that subclass can mock it for testing private int attempt = 3;
protected WebTarget pubmedWebTarget;
private List<String> supportedExtensions; private List<String> supportedExtensions;
@Autowired
private LiveImportClient liveImportClient;
/** /**
* Set the file extensions supported by this metadata service * Set the file extensions supported by this metadata service
* *
@@ -187,29 +191,7 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
* @throws Exception on generic exception * @throws Exception on generic exception
*/ */
@Override @Override
public void init() throws Exception { public void init() throws Exception {}
Client client = ClientBuilder.newClient();
WebTarget webTarget = client.target(baseAddress);
pubmedWebTarget = webTarget.queryParam("db", "pubmed");
}
/**
* Return the baseAddress set to this object
*
* @return The String object that represents the baseAddress of this object
*/
public String getBaseAddress() {
return baseAddress;
}
/**
* Set the baseAddress to this object
*
* @param baseAddress The String object that represents the baseAddress of this object
*/
public void setBaseAddress(String baseAddress) {
this.baseAddress = baseAddress;
}
private class GetNbRecords implements Callable<Integer> { private class GetNbRecords implements Callable<Integer> {
@@ -226,24 +208,27 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override @Override
public Integer call() throws Exception { public Integer call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget URIBuilder uriBuilder = new URIBuilder(urlSearch);
.queryParam("term", query.getParameterAsClass("query", String.class)); uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("term", query.getParameterAsClass("query", String.class));
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
}
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi"); if (StringUtils.isBlank(response)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); return Integer.parseInt(getSingleElementValue(response, "Count"));
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
String count = getSingleElementValue(responseString, "Count");
return Integer.parseInt(count);
} }
} }
private String getSingleElementValue(String src, String elementName) { private String getSingleElementValue(String src, String elementName) {
String value = null; String value = null;
@@ -286,41 +271,61 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
Integer start = query.getParameterAsClass("start", Integer.class); Integer start = query.getParameterAsClass("start", Integer.class);
Integer count = query.getParameterAsClass("count", Integer.class); Integer count = query.getParameterAsClass("count", Integer.class);
if (count == null || count < 0) { if (Objects.isNull(count) || count < 0) {
count = 10; count = 10;
} }
if (start == null || start < 0) { if (Objects.isNull(start) || start < 0) {
start = 0; start = 0;
} }
List<ImportRecord> records = new LinkedList<ImportRecord>(); List<ImportRecord> records = new LinkedList<ImportRecord>();
WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", queryString); URIBuilder uriBuilder = new URIBuilder(urlSearch);
getRecordIdsTarget = getRecordIdsTarget.queryParam("retstart", start); uriBuilder.addParameter("db", "pubmed");
getRecordIdsTarget = getRecordIdsTarget.queryParam("retmax", count); uriBuilder.addParameter("retstart", start.toString());
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y"); uriBuilder.addParameter("retmax", count.toString());
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi"); uriBuilder.addParameter("usehistory", "y");
uriBuilder.addParameter("term", queryString);
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
}
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); if (StringUtils.isBlank(response)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
Response response = invocationBuilder.get(); String queryKey = getSingleElementValue(response, "QueryKey");
String responseString = response.readEntity(String.class); String webEnv = getSingleElementValue(response, "WebEnv");
String queryKey = getSingleElementValue(responseString, "QueryKey"); URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
String webEnv = getSingleElementValue(responseString, "WebEnv"); uriBuilder2.addParameter("db", "pubmed");
uriBuilder2.addParameter("retstart", start.toString());
uriBuilder2.addParameter("retmax", count.toString());
uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
uriBuilder2.addParameter("retmode", "xml");
Map<String, Map<String, String>> params2 = new HashMap<String, Map<String,String>>();
String response2 = StringUtils.EMPTY;
countAttempt = 0;
while (StringUtils.isBlank(response2) && countAttempt <= attempt) {
countAttempt++;
response2 = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
}
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv); if (StringUtils.isBlank(response2)) {
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey); throw new RuntimeException("After " + attempt
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml"); + " attempts to contact the PubMed service, a correct answer could not be received."
getRecordsTarget = getRecordsTarget.path("efetch.fcgi"); + " The request was made with this URL:" + uriBuilder2.toString());
getRecordsTarget = getRecordsTarget.queryParam("retmax", count); }
getRecordsTarget = getRecordsTarget.queryParam("retstart", start);
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE); List<Element> elements = splitToRecords(response2);
response = invocationBuilder.get();
List<Element> elements = splitToRecords(response.readEntity(String.class));
for (Element record : elements) { for (Element record : elements) {
records.add(transformSourceRecords(record)); records.add(transformSourceRecords(record));
@@ -361,23 +366,29 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override @Override
public ImportRecord call() throws Exception { public ImportRecord call() throws Exception {
String id = query.getParameterAsClass("id", String.class);
WebTarget getRecordTarget = pubmedWebTarget.queryParam("id", id); URIBuilder uriBuilder = new URIBuilder(urlFetch);
getRecordTarget = getRecordTarget.queryParam("retmode", "xml"); uriBuilder.addParameter("db", "pubmed");
getRecordTarget = getRecordTarget.path("efetch.fcgi"); uriBuilder.addParameter("retmode", "xml");
uriBuilder.addParameter("id", query.getParameterAsClass("id", String.class));
Invocation.Builder invocationBuilder = getRecordTarget.request(MediaType.TEXT_PLAIN_TYPE); Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
Response response = invocationBuilder.get(); int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
List<Element> elements = splitToRecords(response.readEntity(String.class)); countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
if (elements.isEmpty()) {
return null;
} }
return transformSourceRecords(elements.get(0)); if (StringUtils.isBlank(response)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
List<Element> elements = splitToRecords(response);
return elements.isEmpty() ? null : transformSourceRecords(elements.get(0));
} }
} }
@@ -396,40 +407,57 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override @Override
public Collection<ImportRecord> call() throws Exception { public Collection<ImportRecord> call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget URIBuilder uriBuilder = new URIBuilder(urlSearch);
.queryParam("term", query.getParameterAsClass("term", String.class)); uriBuilder.addParameter("db", "pubmed");
getRecordIdsTarget = getRecordIdsTarget uriBuilder.addParameter("usehistory", "y");
.queryParam("field", query.getParameterAsClass("field", String.class)); uriBuilder.addParameter("term", query.getParameterAsClass("term", String.class));
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y"); uriBuilder.addParameter("field", query.getParameterAsClass("field", String.class));
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
}
Response response = invocationBuilder.get(); if (StringUtils.isBlank(response)) {
String responseString = response.readEntity(String.class); throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
String queryKey = getSingleElementValue(responseString, "QueryKey"); String webEnv = getSingleElementValue(response, "WebEnv");
String webEnv = getSingleElementValue(responseString, "WebEnv"); String queryKey = getSingleElementValue(response, "QueryKey");
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv); URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey); uriBuilder2.addParameter("db", "pubmed");
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml"); uriBuilder2.addParameter("retmode", "xml");
getRecordsTarget = getRecordsTarget.path("efetch.fcgi"); uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE); Map<String, Map<String, String>> params2 = new HashMap<String, Map<String,String>>();
response = invocationBuilder.get(); String response2 = StringUtils.EMPTY;
countAttempt = 0;
while (StringUtils.isBlank(response2) && countAttempt <= attempt) {
countAttempt++;
response2 = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
}
String xml = response.readEntity(String.class); if (StringUtils.isBlank(response2)) {
return parseXMLString(xml); throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder2.toString());
}
return parseXMLString(response2);
} }
} }
@Override @Override
public List<ImportRecord> getRecords(InputStream inputStream) throws FileSourceException { public List<ImportRecord> getRecords(InputStream inputStream) throws FileSourceException {
String xml = null;
try (Reader reader = new InputStreamReader(inputStream, "UTF-8")) { try (Reader reader = new InputStreamReader(inputStream, "UTF-8")) {
xml = CharStreams.toString(reader); String xml = CharStreams.toString(reader);
return parseXMLString(xml); return parseXMLString(xml);
} catch (IOException e) { } catch (IOException e) {
throw new FileSourceException ("Cannot read XML from InputStream", e); throw new FileSourceException ("Cannot read XML from InputStream", e);
@@ -456,4 +484,21 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
} }
return records; return records;
} }
}
public String getUrlFetch() {
return urlFetch;
}
public void setUrlFetch(String urlFetch) {
this.urlFetch = urlFetch;
}
public String getUrlSearch() {
return urlSearch;
}
public void setUrlSearch(String urlSearch) {
this.urlSearch = urlSearch;
}
}

View File

@@ -47,7 +47,8 @@
<bean id="pubmedImportService" <bean id="pubmedImportService"
class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl"> class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl">
<property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/> <property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/>
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/> <property name="urlFetch" value="${pubmed.url.fetch}"/>
<property name="urlSearch" value="${pubmed.url.search}"/>
<property name="generateQueryForItem" ref="pubmedService"></property> <property name="generateQueryForItem" ref="pubmedService"></property>
<property name="supportedExtensions"> <property name="supportedExtensions">
<list> <list>

View File

@@ -59,22 +59,15 @@
</bean> </bean>
<bean id="pubmedLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider"> <bean id="pubmedLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="mockPubmedImportService"/> <property name="metadataSource" ref="pubmedImportService"/>
<property name="sourceIdentifier" value="pubmed"/> <property name="sourceIdentifier" value="pubmed"/>
<property name="recordIdMetadata" value="dc.identifier.other"/> <property name="recordIdMetadata" value="dc.identifier.other"/>
</bean> <property name="supportedEntityTypes">
<bean id="mockPubmedImportService"
class="org.dspace.external.provider.impl.MockPubmedImportMetadataSourceServiceImpl">
<property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/>
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/>
<property name="supportedExtensions">
<list> <list>
<value>xml</value> <value>Publication</value>
<value>none</value>
</list> </list>
</property> </property>
</bean> </bean>
</beans> </beans>

View File

@@ -1,87 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.external.provider.impl;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import javax.ws.rs.client.Invocation;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl;
import org.mockito.ArgumentCaptor;
import org.mockito.ArgumentMatchers;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.springframework.util.FileCopyUtils;
/**
* we override the init method to mock the rest call to pubmed the following
* mock definitions will allow to answer to efetch or esearch requests using the
* test resource files (pubmed-esearch.fcgi.xml or pubmed-efetch.fcgi.xml)
*
* @author Andrea Bollini (andrea.bollini at 4science.it)
*
*/
public class MockPubmedImportMetadataSourceServiceImpl extends PubmedImportMetadataSourceServiceImpl {
@Override
public void init() throws Exception {
pubmedWebTarget = Mockito.mock(WebTarget.class);
ArgumentCaptor<String> valueCapture = ArgumentCaptor.forClass(String.class);
when(pubmedWebTarget.queryParam(ArgumentMatchers.any(), ArgumentMatchers.any()))
.thenAnswer(new Answer<WebTarget>() {
@Override
public WebTarget answer(InvocationOnMock invocation) throws Throwable {
return pubmedWebTarget;
}
});
when(pubmedWebTarget.path(valueCapture.capture())).thenAnswer(new Answer<WebTarget>() {
@Override
public WebTarget answer(InvocationOnMock invocation) throws Throwable {
return pubmedWebTarget;
}
});
when(pubmedWebTarget.request(ArgumentMatchers.any(MediaType.class)))
.thenAnswer(new Answer<Invocation.Builder>() {
@Override
public Invocation.Builder answer(InvocationOnMock invocation) throws Throwable {
Invocation.Builder builder = Mockito.mock(Invocation.Builder.class);
when(builder.get()).thenAnswer(new Answer<Response>() {
@Override
public Response answer(InvocationOnMock invocation) throws Throwable {
Response response = Mockito.mock(Response.class);
when(response.readEntity(ArgumentMatchers.eq(String.class))).then(new Answer<String>() {
@Override
public String answer(InvocationOnMock invocation) throws Throwable {
String resourceName = "pubmed-" + valueCapture.getValue() + ".xml";
InputStream resource = getClass().getResourceAsStream(resourceName);
try (Reader reader = new InputStreamReader(resource, StandardCharsets.UTF_8)) {
return FileCopyUtils.copyToString(reader);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
});
return response;
}
});
return builder;
};
});
}
}

View File

@@ -42,4 +42,9 @@ epo.authUrl = https://ops.epo.org/3.2/auth/accesstoken
epo.url = https://ops.epo.org/rest-services/published-data/publication/$(doctype)/$(id)/biblio epo.url = https://ops.epo.org/rest-services/published-data/publication/$(doctype)/$(id)/biblio
# this url will be used to performe basic searching # this url will be used to performe basic searching
epo.searchUrl = https://ops.epo.org/rest-services/published-data/search epo.searchUrl = https://ops.epo.org/rest-services/published-data/search
#################################################################
#---------------------- PubMed -----------------------------#
#---------------------------------------------------------------#
pubmed.url.search = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
pubmed.url.fetch = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi
################################################################# #################################################################