Merge pull request #8335 from 4Science/CST-5997-LiveImportFrom-Pubmed-NotWorking

Pubmed query not working
This commit is contained in:
Tim Donohue
2022-06-10 16:28:51 -05:00
committed by GitHub
6 changed files with 162 additions and 204 deletions

View File

@@ -74,7 +74,8 @@ public class LiveImportClientImpl implements LiveImportClient {
HttpResponse httpResponse = httpClient.execute(method);
if (isNotSuccessfull(httpResponse)) {
throw new RuntimeException();
throw new RuntimeException("The request failed with: " + getStatusCode(httpResponse) + " code, reason= "
+ httpResponse.getStatusLine().getReasonPhrase());
}
InputStream inputStream = httpResponse.getEntity().getContent();
return IOUtils.toString(inputStream, Charset.defaultCharset());

View File

@@ -14,23 +14,23 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Callable;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.Invocation;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import com.google.common.io.CharStreams;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.utils.URIBuilder;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.FileMultipleOccurencesException;
import org.dspace.importer.external.exception.FileSourceException;
import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
import org.dspace.importer.external.service.components.FileSource;
import org.dspace.importer.external.service.components.QuerySource;
@@ -41,6 +41,7 @@ import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Implements a data source for querying PubMed Central
@@ -51,13 +52,16 @@ import org.jdom2.xpath.XPathFactory;
public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Element>
implements QuerySource, FileSource {
private String baseAddress;
private String urlFetch;
private String urlSearch;
// it is protected so that subclass can mock it for testing
protected WebTarget pubmedWebTarget;
private int attempt = 3;
private List<String> supportedExtensions;
@Autowired
private LiveImportClient liveImportClient;
/**
* Set the file extensions supported by this metadata service
*
@@ -187,29 +191,7 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
* @throws Exception on generic exception
*/
@Override
public void init() throws Exception {
Client client = ClientBuilder.newClient();
WebTarget webTarget = client.target(baseAddress);
pubmedWebTarget = webTarget.queryParam("db", "pubmed");
}
/**
* Return the baseAddress set to this object
*
* @return The String object that represents the baseAddress of this object
*/
public String getBaseAddress() {
return baseAddress;
}
/**
* Set the baseAddress to this object
*
* @param baseAddress The String object that represents the baseAddress of this object
*/
public void setBaseAddress(String baseAddress) {
this.baseAddress = baseAddress;
}
public void init() throws Exception {}
private class GetNbRecords implements Callable<Integer> {
@@ -226,24 +208,27 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override
public Integer call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget
.queryParam("term", query.getParameterAsClass("query", String.class));
URIBuilder uriBuilder = new URIBuilder(urlSearch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("term", query.getParameterAsClass("query", String.class));
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
}
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
if (StringUtils.isBlank(response)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
String count = getSingleElementValue(responseString, "Count");
return Integer.parseInt(count);
return Integer.parseInt(getSingleElementValue(response, "Count"));
}
}
private String getSingleElementValue(String src, String elementName) {
String value = null;
@@ -286,41 +271,61 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
Integer start = query.getParameterAsClass("start", Integer.class);
Integer count = query.getParameterAsClass("count", Integer.class);
if (count == null || count < 0) {
if (Objects.isNull(count) || count < 0) {
count = 10;
}
if (start == null || start < 0) {
if (Objects.isNull(start) || start < 0) {
start = 0;
}
List<ImportRecord> records = new LinkedList<ImportRecord>();
WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", queryString);
getRecordIdsTarget = getRecordIdsTarget.queryParam("retstart", start);
getRecordIdsTarget = getRecordIdsTarget.queryParam("retmax", count);
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y");
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
URIBuilder uriBuilder = new URIBuilder(urlSearch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("retstart", start.toString());
uriBuilder.addParameter("retmax", count.toString());
uriBuilder.addParameter("usehistory", "y");
uriBuilder.addParameter("term", queryString);
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
}
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
if (StringUtils.isBlank(response)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
String queryKey = getSingleElementValue(response, "QueryKey");
String webEnv = getSingleElementValue(response, "WebEnv");
String queryKey = getSingleElementValue(responseString, "QueryKey");
String webEnv = getSingleElementValue(responseString, "WebEnv");
URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
uriBuilder2.addParameter("db", "pubmed");
uriBuilder2.addParameter("retstart", start.toString());
uriBuilder2.addParameter("retmax", count.toString());
uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
uriBuilder2.addParameter("retmode", "xml");
Map<String, Map<String, String>> params2 = new HashMap<String, Map<String,String>>();
String response2 = StringUtils.EMPTY;
countAttempt = 0;
while (StringUtils.isBlank(response2) && countAttempt <= attempt) {
countAttempt++;
response2 = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
}
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv);
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey);
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
getRecordsTarget = getRecordsTarget.queryParam("retmax", count);
getRecordsTarget = getRecordsTarget.queryParam("retstart", start);
if (StringUtils.isBlank(response2)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder2.toString());
}
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE);
response = invocationBuilder.get();
List<Element> elements = splitToRecords(response.readEntity(String.class));
List<Element> elements = splitToRecords(response2);
for (Element record : elements) {
records.add(transformSourceRecords(record));
@@ -361,23 +366,29 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override
public ImportRecord call() throws Exception {
String id = query.getParameterAsClass("id", String.class);
WebTarget getRecordTarget = pubmedWebTarget.queryParam("id", id);
getRecordTarget = getRecordTarget.queryParam("retmode", "xml");
getRecordTarget = getRecordTarget.path("efetch.fcgi");
URIBuilder uriBuilder = new URIBuilder(urlFetch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("retmode", "xml");
uriBuilder.addParameter("id", query.getParameterAsClass("id", String.class));
Invocation.Builder invocationBuilder = getRecordTarget.request(MediaType.TEXT_PLAIN_TYPE);
Response response = invocationBuilder.get();
List<Element> elements = splitToRecords(response.readEntity(String.class));
if (elements.isEmpty()) {
return null;
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
}
return transformSourceRecords(elements.get(0));
if (StringUtils.isBlank(response)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
List<Element> elements = splitToRecords(response);
return elements.isEmpty() ? null : transformSourceRecords(elements.get(0));
}
}
@@ -396,40 +407,57 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override
public Collection<ImportRecord> call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget
.queryParam("term", query.getParameterAsClass("term", String.class));
getRecordIdsTarget = getRecordIdsTarget
.queryParam("field", query.getParameterAsClass("field", String.class));
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y");
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
URIBuilder uriBuilder = new URIBuilder(urlSearch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("usehistory", "y");
uriBuilder.addParameter("term", query.getParameterAsClass("term", String.class));
uriBuilder.addParameter("field", query.getParameterAsClass("field", String.class));
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
Map<String, Map<String, String>> params = new HashMap<String, Map<String,String>>();
String response = StringUtils.EMPTY;
int countAttempt = 0;
while (StringUtils.isBlank(response) && countAttempt <= attempt) {
countAttempt++;
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
}
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
if (StringUtils.isBlank(response)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder.toString());
}
String queryKey = getSingleElementValue(responseString, "QueryKey");
String webEnv = getSingleElementValue(responseString, "WebEnv");
String webEnv = getSingleElementValue(response, "WebEnv");
String queryKey = getSingleElementValue(response, "QueryKey");
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv);
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey);
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
uriBuilder2.addParameter("db", "pubmed");
uriBuilder2.addParameter("retmode", "xml");
uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE);
response = invocationBuilder.get();
Map<String, Map<String, String>> params2 = new HashMap<String, Map<String,String>>();
String response2 = StringUtils.EMPTY;
countAttempt = 0;
while (StringUtils.isBlank(response2) && countAttempt <= attempt) {
countAttempt++;
response2 = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
}
String xml = response.readEntity(String.class);
return parseXMLString(xml);
if (StringUtils.isBlank(response2)) {
throw new RuntimeException("After " + attempt
+ " attempts to contact the PubMed service, a correct answer could not be received."
+ " The request was made with this URL:" + uriBuilder2.toString());
}
return parseXMLString(response2);
}
}
@Override
public List<ImportRecord> getRecords(InputStream inputStream) throws FileSourceException {
String xml = null;
try (Reader reader = new InputStreamReader(inputStream, "UTF-8")) {
xml = CharStreams.toString(reader);
String xml = CharStreams.toString(reader);
return parseXMLString(xml);
} catch (IOException e) {
throw new FileSourceException ("Cannot read XML from InputStream", e);
@@ -456,4 +484,21 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
}
return records;
}
}
public String getUrlFetch() {
return urlFetch;
}
public void setUrlFetch(String urlFetch) {
this.urlFetch = urlFetch;
}
public String getUrlSearch() {
return urlSearch;
}
public void setUrlSearch(String urlSearch) {
this.urlSearch = urlSearch;
}
}

View File

@@ -47,7 +47,8 @@
<bean id="pubmedImportService"
class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl">
<property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/>
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/>
<property name="urlFetch" value="${pubmed.url.fetch}"/>
<property name="urlSearch" value="${pubmed.url.search}"/>
<property name="generateQueryForItem" ref="pubmedService"></property>
<property name="supportedExtensions">
<list>

View File

@@ -59,22 +59,15 @@
</bean>
<bean id="pubmedLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="mockPubmedImportService"/>
<property name="metadataSource" ref="pubmedImportService"/>
<property name="sourceIdentifier" value="pubmed"/>
<property name="recordIdMetadata" value="dc.identifier.other"/>
</bean>
<bean id="mockPubmedImportService"
class="org.dspace.external.provider.impl.MockPubmedImportMetadataSourceServiceImpl">
<property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/>
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/>
<property name="supportedExtensions">
<property name="supportedEntityTypes">
<list>
<value>xml</value>
<value>Publication</value>
<value>none</value>
</list>
</property>
</bean>
</beans>
</beans>

View File

@@ -1,87 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.external.provider.impl;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import javax.ws.rs.client.Invocation;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl;
import org.mockito.ArgumentCaptor;
import org.mockito.ArgumentMatchers;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.springframework.util.FileCopyUtils;
/**
* we override the init method to mock the rest call to pubmed the following
* mock definitions will allow to answer to efetch or esearch requests using the
* test resource files (pubmed-esearch.fcgi.xml or pubmed-efetch.fcgi.xml)
*
* @author Andrea Bollini (andrea.bollini at 4science.it)
*
*/
public class MockPubmedImportMetadataSourceServiceImpl extends PubmedImportMetadataSourceServiceImpl {
@Override
public void init() throws Exception {
pubmedWebTarget = Mockito.mock(WebTarget.class);
ArgumentCaptor<String> valueCapture = ArgumentCaptor.forClass(String.class);
when(pubmedWebTarget.queryParam(ArgumentMatchers.any(), ArgumentMatchers.any()))
.thenAnswer(new Answer<WebTarget>() {
@Override
public WebTarget answer(InvocationOnMock invocation) throws Throwable {
return pubmedWebTarget;
}
});
when(pubmedWebTarget.path(valueCapture.capture())).thenAnswer(new Answer<WebTarget>() {
@Override
public WebTarget answer(InvocationOnMock invocation) throws Throwable {
return pubmedWebTarget;
}
});
when(pubmedWebTarget.request(ArgumentMatchers.any(MediaType.class)))
.thenAnswer(new Answer<Invocation.Builder>() {
@Override
public Invocation.Builder answer(InvocationOnMock invocation) throws Throwable {
Invocation.Builder builder = Mockito.mock(Invocation.Builder.class);
when(builder.get()).thenAnswer(new Answer<Response>() {
@Override
public Response answer(InvocationOnMock invocation) throws Throwable {
Response response = Mockito.mock(Response.class);
when(response.readEntity(ArgumentMatchers.eq(String.class))).then(new Answer<String>() {
@Override
public String answer(InvocationOnMock invocation) throws Throwable {
String resourceName = "pubmed-" + valueCapture.getValue() + ".xml";
InputStream resource = getClass().getResourceAsStream(resourceName);
try (Reader reader = new InputStreamReader(resource, StandardCharsets.UTF_8)) {
return FileCopyUtils.copyToString(reader);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
});
return response;
}
});
return builder;
};
});
}
}

View File

@@ -42,4 +42,9 @@ epo.authUrl = https://ops.epo.org/3.2/auth/accesstoken
epo.url = https://ops.epo.org/rest-services/published-data/publication/$(doctype)/$(id)/biblio
# this url will be used to performe basic searching
epo.searchUrl = https://ops.epo.org/rest-services/published-data/search
#################################################################
#---------------------- PubMed -----------------------------#
#---------------------------------------------------------------#
pubmed.url.search = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
pubmed.url.fetch = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi
#################################################################