mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-17 15:03:18 +00:00
[CST-5997] refactored pubmed live import service
This commit is contained in:
@@ -14,23 +14,23 @@ import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.Callable;
|
||||
import javax.ws.rs.client.Client;
|
||||
import javax.ws.rs.client.ClientBuilder;
|
||||
import javax.ws.rs.client.Invocation;
|
||||
import javax.ws.rs.client.WebTarget;
|
||||
import javax.ws.rs.core.MediaType;
|
||||
import javax.ws.rs.core.Response;
|
||||
|
||||
import com.google.common.io.CharStreams;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.http.client.utils.URIBuilder;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.importer.external.datamodel.ImportRecord;
|
||||
import org.dspace.importer.external.datamodel.Query;
|
||||
import org.dspace.importer.external.exception.FileMultipleOccurencesException;
|
||||
import org.dspace.importer.external.exception.FileSourceException;
|
||||
import org.dspace.importer.external.exception.MetadataSourceException;
|
||||
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
|
||||
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
|
||||
import org.dspace.importer.external.service.components.FileSource;
|
||||
import org.dspace.importer.external.service.components.QuerySource;
|
||||
@@ -41,6 +41,7 @@ import org.jdom2.filter.Filters;
|
||||
import org.jdom2.input.SAXBuilder;
|
||||
import org.jdom2.xpath.XPathExpression;
|
||||
import org.jdom2.xpath.XPathFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
/**
|
||||
* Implements a data source for querying PubMed Central
|
||||
@@ -51,13 +52,14 @@ import org.jdom2.xpath.XPathFactory;
|
||||
public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Element>
|
||||
implements QuerySource, FileSource {
|
||||
|
||||
private String baseAddress;
|
||||
|
||||
// it is protected so that subclass can mock it for testing
|
||||
protected WebTarget pubmedWebTarget;
|
||||
private String urlFetch;
|
||||
private String urlSearch;
|
||||
|
||||
private List<String> supportedExtensions;
|
||||
|
||||
@Autowired
|
||||
private LiveImportClient liveImportClient;
|
||||
|
||||
/**
|
||||
* Set the file extensions supported by this metadata service
|
||||
*
|
||||
@@ -187,29 +189,7 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
|
||||
* @throws Exception on generic exception
|
||||
*/
|
||||
@Override
|
||||
public void init() throws Exception {
|
||||
Client client = ClientBuilder.newClient();
|
||||
WebTarget webTarget = client.target(baseAddress);
|
||||
pubmedWebTarget = webTarget.queryParam("db", "pubmed");
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the baseAddress set to this object
|
||||
*
|
||||
* @return The String object that represents the baseAddress of this object
|
||||
*/
|
||||
public String getBaseAddress() {
|
||||
return baseAddress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the baseAddress to this object
|
||||
*
|
||||
* @param baseAddress The String object that represents the baseAddress of this object
|
||||
*/
|
||||
public void setBaseAddress(String baseAddress) {
|
||||
this.baseAddress = baseAddress;
|
||||
}
|
||||
public void init() throws Exception {}
|
||||
|
||||
private class GetNbRecords implements Callable<Integer> {
|
||||
|
||||
@@ -226,24 +206,15 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
|
||||
|
||||
@Override
|
||||
public Integer call() throws Exception {
|
||||
WebTarget getRecordIdsTarget = pubmedWebTarget
|
||||
.queryParam("term", query.getParameterAsClass("query", String.class));
|
||||
|
||||
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
|
||||
|
||||
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
|
||||
|
||||
Response response = invocationBuilder.get();
|
||||
|
||||
String responseString = response.readEntity(String.class);
|
||||
|
||||
String count = getSingleElementValue(responseString, "Count");
|
||||
|
||||
return Integer.parseInt(count);
|
||||
URIBuilder uriBuilder = new URIBuilder(urlSearch);
|
||||
uriBuilder.addParameter("db", "pubmed");
|
||||
uriBuilder.addParameter("term", query.getParameterAsClass("query", String.class));
|
||||
Map<String,String> params = new HashMap<String, String>();
|
||||
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
|
||||
return StringUtils.isNotBlank(response) ? Integer.parseInt(getSingleElementValue(response, "Count")) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private String getSingleElementValue(String src, String elementName) {
|
||||
String value = null;
|
||||
|
||||
@@ -286,41 +257,39 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
|
||||
Integer start = query.getParameterAsClass("start", Integer.class);
|
||||
Integer count = query.getParameterAsClass("count", Integer.class);
|
||||
|
||||
if (count == null || count < 0) {
|
||||
if (Objects.isNull(count) || count < 0) {
|
||||
count = 10;
|
||||
}
|
||||
|
||||
if (start == null || start < 0) {
|
||||
if (Objects.isNull(start) || start < 0) {
|
||||
start = 0;
|
||||
}
|
||||
|
||||
List<ImportRecord> records = new LinkedList<ImportRecord>();
|
||||
|
||||
WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", queryString);
|
||||
getRecordIdsTarget = getRecordIdsTarget.queryParam("retstart", start);
|
||||
getRecordIdsTarget = getRecordIdsTarget.queryParam("retmax", count);
|
||||
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y");
|
||||
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
|
||||
URIBuilder uriBuilder = new URIBuilder(urlSearch);
|
||||
uriBuilder.addParameter("db", "pubmed");
|
||||
uriBuilder.addParameter("retstart", start.toString());
|
||||
uriBuilder.addParameter("retmax", count.toString());
|
||||
uriBuilder.addParameter("usehistory", "y");
|
||||
uriBuilder.addParameter("term", queryString);
|
||||
Map<String,String> params = new HashMap<String, String>();
|
||||
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
|
||||
|
||||
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
|
||||
String queryKey = getSingleElementValue(response, "QueryKey");
|
||||
String webEnv = getSingleElementValue(response, "WebEnv");
|
||||
|
||||
Response response = invocationBuilder.get();
|
||||
String responseString = response.readEntity(String.class);
|
||||
URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
|
||||
uriBuilder2.addParameter("db", "pubmed");
|
||||
uriBuilder2.addParameter("retstart", start.toString());
|
||||
uriBuilder2.addParameter("retmax", count.toString());
|
||||
uriBuilder2.addParameter("WebEnv", webEnv);
|
||||
uriBuilder2.addParameter("query_key", queryKey);
|
||||
uriBuilder2.addParameter("retmode", "xml");
|
||||
Map<String,String> params2 = new HashMap<String, String>();
|
||||
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
|
||||
|
||||
String queryKey = getSingleElementValue(responseString, "QueryKey");
|
||||
String webEnv = getSingleElementValue(responseString, "WebEnv");
|
||||
|
||||
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv);
|
||||
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey);
|
||||
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
|
||||
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
|
||||
getRecordsTarget = getRecordsTarget.queryParam("retmax", count);
|
||||
getRecordsTarget = getRecordsTarget.queryParam("retstart", start);
|
||||
|
||||
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE);
|
||||
response = invocationBuilder.get();
|
||||
|
||||
List<Element> elements = splitToRecords(response.readEntity(String.class));
|
||||
List<Element> elements = splitToRecords(response);
|
||||
|
||||
for (Element record : elements) {
|
||||
records.add(transformSourceRecords(record));
|
||||
@@ -361,23 +330,18 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
|
||||
|
||||
@Override
|
||||
public ImportRecord call() throws Exception {
|
||||
String id = query.getParameterAsClass("id", String.class);
|
||||
|
||||
WebTarget getRecordTarget = pubmedWebTarget.queryParam("id", id);
|
||||
getRecordTarget = getRecordTarget.queryParam("retmode", "xml");
|
||||
getRecordTarget = getRecordTarget.path("efetch.fcgi");
|
||||
URIBuilder uriBuilder = new URIBuilder(urlFetch);
|
||||
uriBuilder.addParameter("db", "pubmed");
|
||||
uriBuilder.addParameter("retmode", "xml");
|
||||
uriBuilder.addParameter("id", query.getParameterAsClass("id", String.class));
|
||||
|
||||
Invocation.Builder invocationBuilder = getRecordTarget.request(MediaType.TEXT_PLAIN_TYPE);
|
||||
Map<String,String> params = new HashMap<String, String>();
|
||||
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
|
||||
|
||||
Response response = invocationBuilder.get();
|
||||
List<Element> elements = splitToRecords(response);
|
||||
|
||||
List<Element> elements = splitToRecords(response.readEntity(String.class));
|
||||
|
||||
if (elements.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return transformSourceRecords(elements.get(0));
|
||||
return elements.isEmpty() ? null : transformSourceRecords(elements.get(0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -396,40 +360,35 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
|
||||
@Override
|
||||
public Collection<ImportRecord> call() throws Exception {
|
||||
|
||||
WebTarget getRecordIdsTarget = pubmedWebTarget
|
||||
.queryParam("term", query.getParameterAsClass("term", String.class));
|
||||
getRecordIdsTarget = getRecordIdsTarget
|
||||
.queryParam("field", query.getParameterAsClass("field", String.class));
|
||||
getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y");
|
||||
getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi");
|
||||
URIBuilder uriBuilder = new URIBuilder(urlSearch);
|
||||
uriBuilder.addParameter("db", "pubmed");
|
||||
uriBuilder.addParameter("usehistory", "y");
|
||||
uriBuilder.addParameter("term", query.getParameterAsClass("term", String.class));
|
||||
uriBuilder.addParameter("field", query.getParameterAsClass("field", String.class));
|
||||
|
||||
Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE);
|
||||
Map<String,String> params = new HashMap<String, String>();
|
||||
String response = liveImportClient.executeHttpGetRequest(1000, uriBuilder.toString(), params);
|
||||
|
||||
Response response = invocationBuilder.get();
|
||||
String responseString = response.readEntity(String.class);
|
||||
String webEnv = getSingleElementValue(response, "WebEnv");
|
||||
String queryKey = getSingleElementValue(response, "QueryKey");
|
||||
|
||||
String queryKey = getSingleElementValue(responseString, "QueryKey");
|
||||
String webEnv = getSingleElementValue(responseString, "WebEnv");
|
||||
URIBuilder uriBuilder2 = new URIBuilder(urlFetch);
|
||||
uriBuilder2.addParameter("db", "pubmed");
|
||||
uriBuilder2.addParameter("retmode", "xml");
|
||||
uriBuilder2.addParameter("WebEnv", webEnv);
|
||||
uriBuilder2.addParameter("query_key", queryKey);
|
||||
|
||||
WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv);
|
||||
getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey);
|
||||
getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml");
|
||||
getRecordsTarget = getRecordsTarget.path("efetch.fcgi");
|
||||
Map<String,String> params2 = new HashMap<String, String>();
|
||||
response = liveImportClient.executeHttpGetRequest(1000, uriBuilder2.toString(), params2);
|
||||
|
||||
invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE);
|
||||
response = invocationBuilder.get();
|
||||
|
||||
String xml = response.readEntity(String.class);
|
||||
return parseXMLString(xml);
|
||||
return parseXMLString(response);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<ImportRecord> getRecords(InputStream inputStream) throws FileSourceException {
|
||||
String xml = null;
|
||||
try (Reader reader = new InputStreamReader(inputStream, "UTF-8")) {
|
||||
xml = CharStreams.toString(reader);
|
||||
String xml = CharStreams.toString(reader);
|
||||
return parseXMLString(xml);
|
||||
} catch (IOException e) {
|
||||
throw new FileSourceException ("Cannot read XML from InputStream", e);
|
||||
@@ -456,4 +415,21 @@ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadat
|
||||
}
|
||||
return records;
|
||||
}
|
||||
}
|
||||
|
||||
public String getUrlFetch() {
|
||||
return urlFetch;
|
||||
}
|
||||
|
||||
public void setUrlFetch(String urlFetch) {
|
||||
this.urlFetch = urlFetch;
|
||||
}
|
||||
|
||||
public String getUrlSearch() {
|
||||
return urlSearch;
|
||||
}
|
||||
|
||||
public void setUrlSearch(String urlSearch) {
|
||||
this.urlSearch = urlSearch;
|
||||
}
|
||||
|
||||
}
|
@@ -47,7 +47,8 @@
|
||||
<bean id="pubmedImportService"
|
||||
class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl">
|
||||
<property name="metadataFieldMapping" ref="pubmedMetadataFieldMapping"/>
|
||||
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/>
|
||||
<property name="urlFetch" value="${pabmed.url.fetch}"/>
|
||||
<property name="urlSearch" value="${pabmed.url.search}"/>
|
||||
<property name="generateQueryForItem" ref="pubmedService"></property>
|
||||
<property name="supportedExtensions">
|
||||
<list>
|
||||
|
@@ -25,4 +25,9 @@ vufind.url.search = https://vufind.org/advanced_demo/api/v1/search
|
||||
|
||||
scielo.url = https://search.scielo.org/?output=ris&q=
|
||||
|
||||
#################################################################
|
||||
#---------------------- Pubmed -----------------------------#
|
||||
#---------------------------------------------------------------#
|
||||
pabmed.url.search = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi
|
||||
pabmed.url.fetch = https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi
|
||||
#################################################################
|
Reference in New Issue
Block a user