[CST-5997] refactored pubmed live import service

This commit is contained in:
Mykhaylo
2022-06-06 16:26:50 +02:00
parent 60f7bf0559
commit dc0aeb847a
3 changed files with 92 additions and 110 deletions

View File

@@ -14,23 +14,23 @@ import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.Invocation;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import com.google.common.io.CharStreams; import com.google.common.io.CharStreams;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.utils.URIBuilder;
import org.dspace.content.Item; import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query; import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.FileMultipleOccurencesException; import org.dspace.importer.external.exception.FileMultipleOccurencesException;
import org.dspace.importer.external.exception.FileSourceException; import org.dspace.importer.external.exception.FileSourceException;
import org.dspace.importer.external.exception.MetadataSourceException; import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
import org.dspace.importer.external.service.AbstractImportMetadataSourceService; import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
import org.dspace.importer.external.service.components.FileSource; import org.dspace.importer.external.service.components.FileSource;
import org.dspace.importer.external.service.components.QuerySource; import org.dspace.importer.external.service.components.QuerySource;
@@ -41,6 +41,7 @@ import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder; import org.jdom2.input.SAXBuilder;
import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory; import org.jdom2.xpath.XPathFactory;
import org.springframework.beans.factory.annotation.Autowired;
/** /**
* Implements a data source for querying PubMed Central * Implements a data source for querying PubMed Central
@@ -51,13 +52,14 @@ import org.jdom2.xpath.XPathFactory;
public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Element> public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Element>
implements QuerySource, FileSource { implements QuerySource, FileSource {
private String baseAddress; private String urlFetch;
private String urlSearch;
// it is protected so that subclass can mock it for testing
protected WebTarget pubmedWebTarget;
private List<String> supportedExtensions; private List<String> supportedExtensions;
@Autowired
private LiveImportClient liveImportClient;
/** /**
* Set the file extensions supported by this metadata service * Set the file extensions supported by this metadata service
* *
@@ -187,29 +189,7 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
* @throws Exception on generic exception * @throws Exception on generic exception
*/ */
@Override @Override
public void init() throws Exception { public void init() throws Exception {}
Client client = ClientBuilder.newClient();
WebTarget webTarget = client.target(baseAddress);
pubmedWebTarget = webTarget.queryParam("db", "pubmed");
}
/**
* Return the baseAddress set to this object
*
* @return The String object that represents the baseAddress of this object
*/
public String getBaseAddress() {
return baseAddress;
}
/**
* Set the baseAddress to this object
*
* @param baseAddress The String object that represents the baseAddress of this object
*/
public void setBaseAddress(String baseAddress) {
this.baseAddress = baseAddress;
}
private class GetNbRecords implements Callable<Integer> { private class GetNbRecords implements Callable<Integer> {
@@ -226,24 +206,15 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override @Override
public Integer call() throws Exception { public Integer call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget URIBuilder uriBuilder = new URIBuilder(urlSearch);
.queryParam("term", query.getParameterAsClass("query", String.class)); uriBuilder.addParameter("db", "pubmed");
uriBuilder.addParameter("term", query.getParameterAsClass("query", String.class));
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi"); Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); return StringUtils.isNotBlank(response) ? Integer.parseInt(getSingleElementValue(response, "Count")) : 0;
Response response = invocationBuilder.get();
String responseString = response.readEntity(String.class);
String count = getSingleElementValue(responseString, "Count");
return Integer.parseInt(count);
} }
} }
private String getSingleElementValue(String src, String elementName) { private String getSingleElementValue(String src, String elementName) {
String value = null; String value = null;
@@ -286,41 +257,39 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
Integer start = query.getParameterAsClass("start", Integer.class); Integer start = query.getParameterAsClass("start", Integer.class);
Integer count = query.getParameterAsClass("count", Integer.class); Integer count = query.getParameterAsClass("count", Integer.class);
if (count == null || count < 0) { if (Objects.isNull(count) || count < 0) {
count = 10; count = 10;
} }
if (start == null || start < 0) { if (Objects.isNull(start) || start < 0) {
start = 0; start = 0;
} }
List<ImportRecord> records = new LinkedList<ImportRecord>(); List<ImportRecord> records = new LinkedList<ImportRecord>();
WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", queryString); URIBuilder uriBuilder = new URIBuilder(urlSearch);
getRecordIdsTarget = getRecordIdsTarget.queryParam("retstart", start); uriBuilder.addParameter("db", "pubmed");
getRecordIdsTarget = getRecordIdsTarget.queryParam("retmax", count); uriBuilder.addParameter("retstart", start.toString());
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y"); uriBuilder.addParameter("retmax", count.toString());
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi"); uriBuilder.addParameter("usehistory", "y");
uriBuilder.addParameter("term", queryString);
Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); String queryKey = getSingleElementValue(response, "QueryKey");
String webEnv = getSingleElementValue(response, "WebEnv");
Response response = invocationBuilder.get(); URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
String responseString = response.readEntity(String.class); uriBuilder2.addParameter("db", "pubmed");
uriBuilder2.addParameter("retstart", start.toString());
uriBuilder2.addParameter("retmax", count.toString());
uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
uriBuilder2.addParameter("retmode", "xml");
Map<String,String> params2 = new HashMap<String, String>();
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
String queryKey = getSingleElementValue(responseString, "QueryKey"); List<Element> elements = splitToRecords(response);
String webEnv = getSingleElementValue(responseString, "WebEnv");
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv);
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey);
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
getRecordsTarget = getRecordsTarget.queryParam("retmax", count);
getRecordsTarget = getRecordsTarget.queryParam("retstart", start);
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE);
response = invocationBuilder.get();
List<Element> elements = splitToRecords(response.readEntity(String.class));
for (Element record : elements) { for (Element record : elements) {
records.add(transformSourceRecords(record)); records.add(transformSourceRecords(record));
@@ -361,23 +330,18 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override @Override
public ImportRecord call() throws Exception { public ImportRecord call() throws Exception {
String id = query.getParameterAsClass("id", String.class);
WebTarget getRecordTarget = pubmedWebTarget.queryParam("id", id); URIBuilder uriBuilder = new URIBuilder(urlFetch);
getRecordTarget = getRecordTarget.queryParam("retmode", "xml"); uriBuilder.addParameter("db", "pubmed");
getRecordTarget = getRecordTarget.path("efetch.fcgi"); uriBuilder.addParameter("retmode", "xml");
uriBuilder.addParameter("id", query.getParameterAsClass("id", String.class));
Invocation.Builder invocationBuilder = getRecordTarget.request(MediaType.TEXT_PLAIN_TYPE); Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
Response response = invocationBuilder.get(); List<Element> elements = splitToRecords(response);
List<Element> elements = splitToRecords(response.readEntity(String.class)); return elements.isEmpty() ? null : transformSourceRecords(elements.get(0));
if (elements.isEmpty()) {
return null;
}
return transformSourceRecords(elements.get(0));
} }
} }
@@ -396,40 +360,35 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Override @Override
public Collection<ImportRecord> call() throws Exception { public Collection<ImportRecord> call() throws Exception {
WebTarget getRecordIdsTarget = pubmedWebTarget URIBuilder uriBuilder = new URIBuilder(urlSearch);
.queryParam("term", query.getParameterAsClass("term", String.class)); uriBuilder.addParameter("db", "pubmed");
getRecordIdsTarget = getRecordIdsTarget uriBuilder.addParameter("usehistory", "y");
.queryParam("field", query.getParameterAsClass("field", String.class)); uriBuilder.addParameter("term", query.getParameterAsClass("term", String.class));
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y"); uriBuilder.addParameter("field", query.getParameterAsClass("field", String.class));
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); Map<String,String> params = new HashMap<String, String>();
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
Response response = invocationBuilder.get(); String webEnv = getSingleElementValue(response, "WebEnv");
String responseString = response.readEntity(String.class); String queryKey = getSingleElementValue(response, "QueryKey");
String queryKey = getSingleElementValue(responseString, "QueryKey"); URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
String webEnv = getSingleElementValue(responseString, "WebEnv"); uriBuilder2.addParameter("db", "pubmed");
uriBuilder2.addParameter("retmode", "xml");
uriBuilder2.addParameter("WebEnv", webEnv);
uriBuilder2.addParameter("query_key", queryKey);
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv); Map<String,String> params2 = new HashMap<String, String>();
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey); response = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE); return parseXMLString(response);
response = invocationBuilder.get();
String xml = response.readEntity(String.class);
return parseXMLString(xml);
} }
} }
@Override @Override
public List<ImportRecord> getRecords(InputStream inputStream) throws FileSourceException { public List<ImportRecord> getRecords(InputStream inputStream) throws FileSourceException {
String xml = null;
try (Reader reader = new InputStreamReader(inputStream, "UTF-8")) { try (Reader reader = new InputStreamReader(inputStream, "UTF-8")) {
xml = CharStreams.toString(reader); String xml = CharStreams.toString(reader);
return parseXMLString(xml); return parseXMLString(xml);
} catch (IOException e) { } catch (IOException e) {
throw new FileSourceException ("Cannot read XML from InputStream", e); throw new FileSourceException ("Cannot read XML from InputStream", e);
@@ -456,4 +415,21 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
} }
return records; return records;
} }
}
public String getUrlFetch() {
return urlFetch;
}
public void setUrlFetch(String urlFetch) {
this.urlFetch = urlFetch;
}
public String getUrlSearch() {
return urlSearch;
}
public void setUrlSearch(String urlSearch) {
this.urlSearch = urlSearch;
}
}

View File

@@ -47,7 +47,8 @@
<bean id="pubmedImportService" <bean id="pubmedImportService"
class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl"> class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl">
<property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/> <property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/>
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/> <property name="urlFetch" value="${pabmed.url.fetch}"/>
<property name="urlSearch" value="${pabmed.url.search}"/>
<property name="generateQueryForItem" ref="pubmedService"></property> <property name="generateQueryForItem" ref="pubmedService"></property>
<property name="supportedExtensions"> <property name="supportedExtensions">
<list> <list>

View File

@@ -25,4 +25,9 @@ vufind.url.search = https://vufind.org/advanced_demo/api/v1/search
scielo.url = https://search.scielo.org/?output=ris&q= scielo.url = https://search.scielo.org/?output=ris&q=
#################################################################
#---------------------- Pubmed -----------------------------#
#---------------------------------------------------------------#
pabmed.url.search = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
pabmed.url.fetch = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi
################################################################# #################################################################