[CST-5997] refactored pubmed live import service

This commit is contained in:
Mykhaylo
2022-06-06 16:26:50 +02:00
parent 60f7bf0559
commit dc0aeb847a
3 changed files with 92 additions and 110 deletions

View File

@@ -14,23 +14,23 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Callable;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.Invocation;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import com.google.common.io.CharStreams;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.utils.URIBuilder;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.FileMultipleOccurencesException;
import org.dspace.importer.external.exception.FileSourceException;
import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
import org.dspace.importer.external.service.components.FileSource;
import org.dspace.importer.external.service.components.QuerySource;
@@ -41,6 +41,7 @@ import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Implements a data source for querying PubMed Central
@@ -51,13 +52,14 @@ import org.jdom2.xpath.XPathFactory;
public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Element>
implements QuerySource, FileSource {
private String baseAddress;
// it is protected so that subclass can mock it for testing
protected WebTarget pubmedWebTarget;
private String urlFetch;
private String urlSearch;
private List<String> supportedExtensions;
@Autowired
private LiveImportClient liveImportClient;
/**
* Set the file extensions supported by this metadata service
*
@@ -187,29 +189,7 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
* @throws Exception on generic exception
*/
@Override
public void init() throws Exception {
Client client = ClientBuilder.newClient();
WebTarget webTarget = client.target(baseAddress);
pubmedWebTarget = webTarget.queryParam("db", "pubmed");
}
/**
* Return the baseAddress set to this object
*
* @return The String object that represents the baseAddress of this object
*/
public String getBaseAddress() {
return baseAddress;
}
/**
* Set the baseAddress to this object
*
* @param baseAddress The String object that represents the baseAddress of this object
*/
public void setBaseAddress(String baseAddress) {
this.baseAddress = baseAddress;
}
public void init() throws Exception {}
private class GetNbRecords implements Callable<Integer> {
@@ -226,24 +206,15 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override
public Integer call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget
.queryParam("term", query.getParameterAsClass("query", String.class));
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
String count = getSingleElementValue(responseString, "Count");
return Integer.parseInt(count);
URIBuilder uriBuilder = new URIBuilder(urlSearch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("term", query.getParameterAsClass("query", String.class));
Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
return StringUtils.isNotBlank(response) ? Integer.parseInt(getSingleElementValue(response, "Count")) : 0;
}
}
private String getSingleElementValue(String src, String elementName) {
String value = null;
@@ -286,41 +257,39 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
Integer start = query.getParameterAsClass("start", Integer.class);
Integer count = query.getParameterAsClass("count", Integer.class);
if (count == null || count < 0) {
if (Objects.isNull(count) || count < 0) {
count = 10;
}
if (start == null || start < 0) {
if (Objects.isNull(start) || start < 0) {
start = 0;
}
List<ImportRecord> records = new LinkedList<ImportRecord>();
WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", queryString);
getRecordIdsTarget = getRecordIdsTarget.queryParam("retstart", start);
getRecordIdsTarget = getRecordIdsTarget.queryParam("retmax", count);
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y");
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
URIBuilder uriBuilder = new URIBuilder(urlSearch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("retstart", start.toString());
uriBuilder.addParameter("retmax", count.toString());
uriBuilder.addParameter("usehistory", "y");
uriBuilder.addParameter("term", queryString);
Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
String queryKey = getSingleElementValue(response, "QueryKey");
String webEnv = getSingleElementValue(response, "WebEnv");
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
uriBuilder2.addParameter("db", "pubmed");
uriBuilder2.addParameter("retstart", start.toString());
uriBuilder2.addParameter("retmax", count.toString());
uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
uriBuilder2.addParameter("retmode", "xml");
Map<String,String> params2 = new HashMap<String, String>();
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
String queryKey = getSingleElementValue(responseString, "QueryKey");
String webEnv = getSingleElementValue(responseString, "WebEnv");
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv);
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey);
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
getRecordsTarget = getRecordsTarget.queryParam("retmax", count);
getRecordsTarget = getRecordsTarget.queryParam("retstart", start);
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE);
response = invocationBuilder.get();
List<Element> elements = splitToRecords(response.readEntity(String.class));
List<Element> elements = splitToRecords(response);
for (Element record : elements) {
records.add(transformSourceRecords(record));
@@ -361,23 +330,18 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override
public ImportRecord call() throws Exception {
String id = query.getParameterAsClass("id", String.class);
WebTarget getRecordTarget = pubmedWebTarget.queryParam("id", id);
getRecordTarget = getRecordTarget.queryParam("retmode", "xml");
getRecordTarget = getRecordTarget.path("efetch.fcgi");
URIBuilder uriBuilder = new URIBuilder(urlFetch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("retmode", "xml");
uriBuilder.addParameter("id", query.getParameterAsClass("id", String.class));
Invocation.Builder invocationBuilder = getRecordTarget.request(MediaType.TEXT_PLAIN_TYPE);
Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
Response response = invocationBuilder.get();
List<Element> elements = splitToRecords(response);
List<Element> elements = splitToRecords(response.readEntity(String.class));
if (elements.isEmpty()) {
return null;
}
return transformSourceRecords(elements.get(0));
return elements.isEmpty() ? null : transformSourceRecords(elements.get(0));
}
}
@@ -396,40 +360,35 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override
public Collection<ImportRecord> call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget
.queryParam("term", query.getParameterAsClass("term", String.class));
getRecordIdsTarget = getRecordIdsTarget
.queryParam("field", query.getParameterAsClass("field", String.class));
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y");
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
URIBuilder uriBuilder = new URIBuilder(urlSearch);
uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("usehistory", "y");
uriBuilder.addParameter("term", query.getParameterAsClass("term", String.class));
uriBuilder.addParameter("field", query.getParameterAsClass("field", String.class));
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
String webEnv = getSingleElementValue(response, "WebEnv");
String queryKey = getSingleElementValue(response, "QueryKey");
String queryKey = getSingleElementValue(responseString, "QueryKey");
String webEnv = getSingleElementValue(responseString, "WebEnv");
URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
uriBuilder2.addParameter("db", "pubmed");
uriBuilder2.addParameter("retmode", "xml");
uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv);
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey);
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
Map<String,String> params2 = new HashMap<String, String>();
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE);
response = invocationBuilder.get();
String xml = response.readEntity(String.class);
return parseXMLString(xml);
return parseXMLString(response);
}
}
@Override
public List<ImportRecord> getRecords(InputStream inputStream) throws FileSourceException {
String xml = null;
try (Reader reader = new InputStreamReader(inputStream, "UTF-8")) {
xml = CharStreams.toString(reader);
String xml = CharStreams.toString(reader);
return parseXMLString(xml);
} catch (IOException e) {
throw new FileSourceException ("Cannot read XML from InputStream", e);
@@ -456,4 +415,21 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
}
return records;
}
}
public String getUrlFetch() {
return urlFetch;
}
public void setUrlFetch(String urlFetch) {
this.urlFetch = urlFetch;
}
public String getUrlSearch() {
return urlSearch;
}
public void setUrlSearch(String urlSearch) {
this.urlSearch = urlSearch;
}
}

View File

@@ -47,7 +47,8 @@
<bean id="pubmedImportService"
class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl">
<property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/>
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/>
<property name="urlFetch" value="${pabmed.url.fetch}"/>
<property name="urlSearch" value="${pabmed.url.search}"/>
<property name="generateQueryForItem" ref="pubmedService"></property>
<property name="supportedExtensions">
<list>

View File

@@ -25,4 +25,9 @@ vufind.url.search = https://vufind.org/advanced_demo/api/v1/search
scielo.url = https://search.scielo.org/?output=ris&q=
#################################################################
#---------------------- Pubmed -----------------------------#
#---------------------------------------------------------------#
pabmed.url.search = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
pabmed.url.fetch = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi
#################################################################