submit external suorce partial implementation

This commit is contained in:
Pasquale Cavallo
2020-07-01 10:36:43 +02:00
parent f3844e63a4
commit 808c4633f1
7 changed files with 547 additions and 7 deletions

View File

@@ -0,0 +1,23 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.arxiv.metadatamapping;
import java.util.Map;
import javax.annotation.Resource;
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;
public class ArXivFieldMapping extends AbstractMetadataFieldMapping {
@Override
@Resource(name = "arxivMetadataFieldMap")
public void setMetadataFieldMap(Map metadataFieldMap) {
super.setMetadataFieldMap(metadataFieldMap);
}
}

View File

@@ -0,0 +1,53 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.arxiv.metadatamapping.transform;
import java.util.List;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.metadatamapping.transform.GenerateQueryService;
public class GenerateArXivQueryService implements GenerateQueryService {
/**
* Create a Query object based on a given item.
* If the item has at least 1 value for dc.identifier.doi, the first one will be used.
* If no DOI is found, the title will be used.
* When no DOI or title is found, an null object is returned instead.
*
* @param item the Item to create a Query from
*/
@Override
public Query generateQueryForItem(Item item) throws MetadataSourceException {
Query query = new Query();
// Retrieve an instance of the ItemService to access business calls on an item.
ItemService itemService = ContentServiceFactory.getInstance().getItemService();
List<MetadataValue> doi = itemService.getMetadata(item, "dc", "identifier", "doi", Item.ANY);
if (doi.size() > 0) {
query.addParameter("term", doi.get(0).getValue());
query.addParameter("field", "ELocationID");
return query;
}
List<MetadataValue> title = itemService.getMetadata(item, "dc", "title", null, Item.ANY);
if (title.size() > 0) {
query.addParameter("term", title.get(0).getValue());
query.addParameter("field", "title");
return query;
}
return null;
}
}

View File

@@ -0,0 +1,310 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.arxiv.service;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.Callable;
import org.apache.axiom.om.OMElement;
import org.apache.axiom.om.OMXMLBuilderFactory;
import org.apache.axiom.om.OMXMLParserWrapper;
import org.apache.axiom.om.xpath.AXIOMXPath;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.HttpParams;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
import org.jaxen.JaxenException;
public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<OMElement> {
private int timeout = 1000;
/**
* How long to wait for a connection to be established.
*
* @param timeout milliseconds
*/
public void setTimeout(int timeout) {
this.timeout = timeout;
}
@Override
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
return retry(new SearchByQueryCallable(query, count, start));
}
@Override
public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException {
return retry(new SearchByQueryCallable(query));
}
@Override
public int getNbRecords(String query) throws MetadataSourceException {
List<ImportRecord> records = retry(new SearchByQueryCallable(query, null, null));
return records != null ? records.size() : 0;
}
@Override
public int getNbRecords(Query query) throws MetadataSourceException {
List<ImportRecord> records = retry(new SearchByQueryCallable(query));
return records != null ? records.size() : 0;
}
@Override
public ImportRecord getRecord(String id) throws MetadataSourceException {
List<ImportRecord> records = retry(new SearchByIdCallable(id));
if (records != null && records.size() > 1) {
throw new MetadataSourceException("More than one result found");
}
return records == null ? null : records.get(0);
}
@Override
public ImportRecord getRecord(Query query) throws MetadataSourceException {
List<ImportRecord> records = retry(new SearchByIdCallable(query));
if (records != null && records.size() > 1) {
throw new MetadataSourceException("More than one result found");
}
return records == null ? null : records.get(0);
}
@Override
public void init() throws Exception {
}
@Override
public String getImportSource() {
return "arxiv";
}
@Override
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
throw new RuntimeException();
}
@Override
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
return null;
}
private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
private Query query;
private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) {
query = new Query();
query.addParameter("query", queryString);
query.addParameter("start", start);
query.addParameter("count", maxResult);
}
private SearchByQueryCallable(Query query) {
this.query = query;
}
@Override
public List<ImportRecord> call() throws Exception {
List<ImportRecord> results = new ArrayList<ImportRecord>();
String queryString = query.getParameterAsClass("query", String.class);
Integer start = query.getParameterAsClass("start", Integer.class);
Integer maxResult = query.getParameterAsClass("count", Integer.class);
HttpGet method = null;
try {
HttpClient client = new DefaultHttpClient();
HttpParams params = client.getParams();
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout);
try {
URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query");
uriBuilder.addParameter("search_query", queryString);
if (maxResult != null) {
uriBuilder.addParameter("max_results", String.valueOf(maxResult));
}
if (start != null) {
uriBuilder.addParameter("start", String.valueOf(start));
}
method = new HttpGet(uriBuilder.build());
} catch (URISyntaxException ex) {
throw new HttpException(ex.getMessage());
}
// Execute the method.
HttpResponse response = client.execute(method);
StatusLine responseStatus = response.getStatusLine();
int statusCode = responseStatus.getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
if (statusCode == HttpStatus.SC_BAD_REQUEST) {
throw new RuntimeException("arXiv query is not valid");
} else {
throw new RuntimeException("Http call failed: "
+ responseStatus);
}
}
try {
InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent());
BufferedReader reader = new BufferedReader(isReader);
StringBuilder sb = new StringBuilder();
String str;
while ((str = reader.readLine()) != null) {
sb.append(str);
}
System.out.println("XML: " + sb.toString());
List<OMElement> omElements = splitToRecords(sb.toString());
for (OMElement record : omElements) {
results.add(transformSourceRecords(record));
}
} catch (Exception e) {
throw new RuntimeException(
"ArXiv identifier is not valid or not exist");
}
} finally {
if (method != null) {
method.releaseConnection();
}
}
return results;
}
}
private class SearchByIdCallable implements Callable<List<ImportRecord>> {
private Query query;
private SearchByIdCallable(Query query) {
this.query = query;
}
private SearchByIdCallable(String id) {
this.query = new Query();
query.addParameter("id", id);
}
@Override
public List<ImportRecord> call() throws Exception {
List<ImportRecord> results = new ArrayList<ImportRecord>();
String arxivid = query.getParameterAsClass("id", String.class);
HttpGet method = null;
try {
HttpClient client = new DefaultHttpClient();
HttpParams params = client.getParams();
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout);
try {
URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query");
if (StringUtils.isNotBlank(arxivid)) {
arxivid = arxivid.trim();
if (arxivid.startsWith("http://arxiv.org/abs/")) {
arxivid = arxivid.substring("http://arxiv.org/abs/".length());
} else if (arxivid.toLowerCase().startsWith("arxiv:")) {
arxivid = arxivid.substring("arxiv:".length());
}
uriBuilder.addParameter("id_list", arxivid);
method = new HttpGet(uriBuilder.build());
}
} catch (URISyntaxException ex) {
throw new HttpException(ex.getMessage());
}
// Execute the method.
HttpResponse response = client.execute(method);
StatusLine responseStatus = response.getStatusLine();
int statusCode = responseStatus.getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
if (statusCode == HttpStatus.SC_BAD_REQUEST) {
throw new RuntimeException("arXiv query is not valid");
} else {
throw new RuntimeException("Http call failed: "
+ responseStatus);
}
}
try {
InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent());
BufferedReader reader = new BufferedReader(isReader);
StringBuffer sb = new StringBuffer();
String str;
while ((str = reader.readLine()) != null) {
sb.append(str);
}
List<OMElement> omElements = splitToRecords(sb.toString());
for (OMElement record : omElements) {
results.add(transformSourceRecords(record));
}
} catch (Exception e) {
throw new RuntimeException(
"ArXiv identifier is not valid or not exist");
}
} finally {
if (method != null) {
method.releaseConnection();
}
}
return results;
}
}
private class FindMatchingRecordCallable implements Callable<List<ImportRecord>> {
private Query query;
private FindMatchingRecordCallable(Item item) throws MetadataSourceException {
query = getGenerateQueryForItem().generateQueryForItem(item);
}
public FindMatchingRecordCallable(Query q) {
query = q;
}
@Override
public List<ImportRecord> call() throws Exception {
return null;
}
}
private static List<OMElement> splitToRecords(String recordsSrc) {
OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(recordsSrc));
OMElement element = records.getDocumentElement();
AXIOMXPath xpath = null;
try {
xpath = new AXIOMXPath("ns:entry");
xpath.addNamespace("ns", "http://www.w3.org/2005/Atom");
List<OMElement> recordsList = xpath.selectNodes(element);
return recordsList;
} catch (JaxenException e) {
return null;
}
}
}

View File

@@ -16,7 +16,6 @@ import org.dspace.importer.external.metadatamapping.contributor.MetadataContribu
import org.dspace.importer.external.metadatamapping.transform.GenerateQueryService;
import org.dspace.importer.external.service.components.AbstractRemoteMetadataSource;
import org.dspace.importer.external.service.components.MetadataSource;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Required;
/**
@@ -49,7 +48,6 @@ public abstract class AbstractImportMetadataSourceService<RecordType> extends Ab
*
* @param generateQueryForItem the query generator to be used.
*/
@Autowired
public void setGenerateQueryForItem(GenerateQueryService generateQueryForItem) {
this.generateQueryForItem = generateQueryForItem;
}

View File

@@ -19,10 +19,6 @@
<context:annotation-config/> <!-- allows us to use spring annotations in beans -->
<bean id="importService" class="org.dspace.importer.external.service.ImportService" scope="singleton"
lazy-init="false" autowire="byType" destroy-method="destroy">
</bean>
<!--If multiple importServices have been configured here but only one is to be used during the lookup step (StartSubmissionLookupStep),
this can be accomplished by specifying the property "publication-lookup.url" to the baseAddress of the required importService
@@ -32,10 +28,27 @@
Omitting this property will default to searching over all configured ImportService implementations
-->
<bean id="importService" class="org.dspace.importer.external.service.ImportService" scope="singleton"
lazy-init="false" autowire="byType" destroy-method="destroy">
</bean>
<bean id="ArXivImportService"
class="org.dspace.importer.external.arxiv.service.ArXivImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="ArXivMetadataFieldMapping"/>
<property name="generateQueryForItem" ref="arXivQueryService"></property>
<!-- <property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/> -->
</bean>
<bean id="ArXivMetadataFieldMapping"
class="org.dspace.importer.external.arxiv.metadatamapping.ArXivFieldMapping">
</bean>
<bean id="PubmedImportService"
class="org.dspace.importer.external.pubmed.service.PubmedImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="PubmedMetadataFieldMapping"/>
<property name="baseAddress" value="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"/>
<property name="generateQueryForItem" ref="pubmedService"></property>
</bean>
<bean id="PubmedMetadataFieldMapping"

View File

@@ -0,0 +1,136 @@
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:util="http://www.springframework.org/schema/util"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd"
default-autowire-candidates="*Service,*DAO,javax.sql.DataSource">
<context:annotation-config/>
<!-- allows us to use spring annotations in beans -->
<util:map id="arxivMetadataFieldMap" key-type="org.dspace.importer.external.metadatamapping.MetadataFieldConfig"
value-type="org.dspace.importer.external.metadatamapping.contributor.MetadataContributor">
<description>Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
what metadatafield is generated.
</description>
<entry key-ref="arxiv.category" value-ref="arxivCategoryContrib"/>
<entry key-ref="arxiv.journal" value-ref="arxivJournalContrib"/>
<entry key-ref="arxiv.doi" value-ref="arxivDoiContrib"/>
<entry key-ref="arxiv.author" value-ref="arxivAuthorContrib"/>
<entry key-ref="arxiv.published" value-ref="arxivPublishedContrib"/>
<entry key-ref="arxiv.articleTitle" value-ref="arxivArticleTitleContrib"/>
<entry key-ref="arxiv.summary" value-ref="arxivSummaryContrib"/>
<entry key-ref="arxiv.id" value-ref="arxivIdContrib"/>
<entry key-ref="arxiv.primaryCategory" value-ref="arxivPrimaryCategoryContrib"/>
</util:map>
<bean id="arxivCategoryContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.category"/>
<property name="query" value="category"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivJournalContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.journal"/>
<property name="query" value="journal_ref"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivDoiContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.doi"/>
<property name="query" value="id"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivAuthorContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.author"/>
<property name="query" value="author/name"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivPublishedContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.published"/>
<property name="query" value="published"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivArticleTitleContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.articleTitle"/>
<property name="query" value="title"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivSummaryContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.summary"/>
<property name="query" value="summary"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivIdContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.id"/>
<property name="query" value="id"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxivPrimaryCategoryContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor">
<property name="field" ref="arxiv.primaryCategory"/>
<property name="query" value="primary_category"/>
<property name="prefixToNamespaceMapping" ref="prefixToNamespaceMapping"/>
</bean>
<bean id="arxiv.category" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.subject"/>
</bean>
<bean id="arxiv.journal" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.source"/>
</bean>
<bean id="arxiv.doi" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.identifier"/>
</bean>
<bean id="arxiv.author" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.contributor.author"/>
</bean>
<bean id="arxiv.published" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.publisher"/>
</bean>
<bean id="arxiv.articleTitle" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.title"/>
</bean>
<bean id="arxiv.summary" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.description.abstract"/>
</bean>
<bean id="arxiv.id" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.description.abstract"/>
</bean>
<bean id="arxiv.primaryCategory" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.subject"/>
</bean>
<util:map id="prefixToNamespaceMapping">
<entry key="ns" value="http://www.w3.org/2005/Atom" />
</util:map>
<bean class="java.lang.Integer" id="maxRetry">
<constructor-arg value="3"/>
</bean>
<bean class="org.dspace.importer.external.arxiv.metadatamapping.transform.GenerateArXivQueryService" id="arXivQueryService">
<description>Defines how an org.dspace.content.Item is mapped to a query in scopus. Please note that exactly one of
these must be present. If multiple are present the result is undefined.
</description>
</bean>
</beans>

View File

@@ -31,10 +31,17 @@
<property name="sourceIdentifier" value="lcname"/>
</bean>
<bean class="org.dspace.external.provider.impl.LiveImportDataProvider">
<bean id="pubmedLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="PubmedImportService"/>
<property name="sourceIdentifier" value="pubmed"/>
<property name="recordIdMetadata" value="dc.identifier.other"/>
</bean>
<bean id="arxivLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="ArXivImportService"/>
<property name="sourceIdentifier" value="arxiv"/>
<property name="recordIdMetadata" value="dc.identifier.other"/>
</bean>
</beans>