diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java new file mode 100644 index 0000000000..c4f6996a27 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java @@ -0,0 +1,23 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.arxiv.metadatamapping; + +import java.util.Map; +import javax.annotation.Resource; + +import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping; + +public class ArXivFieldMapping extends AbstractMetadataFieldMapping { + + @Override + @Resource(name = "arxivMetadataFieldMap") + public void setMetadataFieldMap(Map metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java new file mode 100644 index 0000000000..7f5e08cb5a --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java @@ -0,0 +1,53 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.arxiv.metadatamapping.transform; + +import java.util.List; + +import org.dspace.content.Item; +import org.dspace.content.MetadataValue; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.ItemService; +import org.dspace.importer.external.datamodel.Query; +import org.dspace.importer.external.exception.MetadataSourceException; +import org.dspace.importer.external.metadatamapping.transform.GenerateQueryService; + +public class GenerateArXivQueryService implements GenerateQueryService { + + /** + * Create a Query object based on a given item. + * If the item has at least 1 value for dc.identifier.doi, the first one will be used. + * If no DOI is found, the title will be used. + * When no DOI or title is found, an null object is returned instead. + * + * @param item the Item to create a Query from + */ + @Override + public Query generateQueryForItem(Item item) throws MetadataSourceException { + Query query = new Query(); + + // Retrieve an instance of the ItemService to access business calls on an item. + ItemService itemService = ContentServiceFactory.getInstance().getItemService(); + List doi = itemService.getMetadata(item, "dc", "identifier", "doi", Item.ANY); + + if (doi.size() > 0) { + query.addParameter("term", doi.get(0).getValue()); + query.addParameter("field", "ELocationID"); + return query; + } + + List title = itemService.getMetadata(item, "dc", "title", null, Item.ANY); + + if (title.size() > 0) { + query.addParameter("term", title.get(0).getValue()); + query.addParameter("field", "title"); + return query; + } + return null; + } +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java new file mode 100644 index 0000000000..567cce1b9a --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -0,0 +1,310 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.arxiv.service; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.StringReader; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Callable; + +import org.apache.axiom.om.OMElement; +import org.apache.axiom.om.OMXMLBuilderFactory; +import org.apache.axiom.om.OMXMLParserWrapper; +import org.apache.axiom.om.xpath.AXIOMXPath; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.HttpException; +import org.apache.http.HttpResponse; +import org.apache.http.HttpStatus; +import org.apache.http.StatusLine; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.params.CoreConnectionPNames; +import org.apache.http.params.HttpParams; +import org.dspace.content.Item; +import org.dspace.importer.external.datamodel.ImportRecord; +import org.dspace.importer.external.datamodel.Query; +import org.dspace.importer.external.exception.MetadataSourceException; +import org.dspace.importer.external.service.AbstractImportMetadataSourceService; +import org.jaxen.JaxenException; + +public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService { + private int timeout = 1000; + + /** + * How long to wait for a connection to be established. + * + * @param timeout milliseconds + */ + public void setTimeout(int timeout) { + this.timeout = timeout; + } + + + @Override + public Collection getRecords(String query, int start, int count) throws MetadataSourceException { + return retry(new SearchByQueryCallable(query, count, start)); + } + + @Override + public Collection getRecords(Query query) throws MetadataSourceException { + return retry(new SearchByQueryCallable(query)); + } + + @Override + public int getNbRecords(String query) throws MetadataSourceException { + List records = retry(new SearchByQueryCallable(query, null, null)); + return records != null ? records.size() : 0; + } + + @Override + public int getNbRecords(Query query) throws MetadataSourceException { + List records = retry(new SearchByQueryCallable(query)); + return records != null ? records.size() : 0; + } + + + @Override + public ImportRecord getRecord(String id) throws MetadataSourceException { + List records = retry(new SearchByIdCallable(id)); + if (records != null && records.size() > 1) { + throw new MetadataSourceException("More than one result found"); + } + return records == null ? null : records.get(0); + } + + @Override + public ImportRecord getRecord(Query query) throws MetadataSourceException { + List records = retry(new SearchByIdCallable(query)); + if (records != null && records.size() > 1) { + throw new MetadataSourceException("More than one result found"); + } + return records == null ? null : records.get(0); + } + + + @Override + public void init() throws Exception { + + } + + + + + + + @Override + public String getImportSource() { + return "arxiv"; + } + + @Override + public Collection findMatchingRecords(Item item) throws MetadataSourceException { + throw new RuntimeException(); + } + + @Override + public Collection findMatchingRecords(Query query) throws MetadataSourceException { + return null; + } + + private class SearchByQueryCallable implements Callable> { + private Query query; + + + private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) { + query = new Query(); + query.addParameter("query", queryString); + query.addParameter("start", start); + query.addParameter("count", maxResult); + } + + private SearchByQueryCallable(Query query) { + this.query = query; + } + + + @Override + public List call() throws Exception { + List results = new ArrayList(); + String queryString = query.getParameterAsClass("query", String.class); + Integer start = query.getParameterAsClass("start", Integer.class); + Integer maxResult = query.getParameterAsClass("count", Integer.class); + + HttpGet method = null; + try { + HttpClient client = new DefaultHttpClient(); + HttpParams params = client.getParams(); + params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); + + try { + URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); + uriBuilder.addParameter("search_query", queryString); + if (maxResult != null) { + uriBuilder.addParameter("max_results", String.valueOf(maxResult)); + } + if (start != null) { + uriBuilder.addParameter("start", String.valueOf(start)); + } + method = new HttpGet(uriBuilder.build()); + } catch (URISyntaxException ex) { + throw new HttpException(ex.getMessage()); + } + + // Execute the method. + HttpResponse response = client.execute(method); + StatusLine responseStatus = response.getStatusLine(); + int statusCode = responseStatus.getStatusCode(); + + if (statusCode != HttpStatus.SC_OK) { + if (statusCode == HttpStatus.SC_BAD_REQUEST) { + throw new RuntimeException("arXiv query is not valid"); + } else { + throw new RuntimeException("Http call failed: " + + responseStatus); + } + } + + try { + InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent()); + BufferedReader reader = new BufferedReader(isReader); + StringBuilder sb = new StringBuilder(); + String str; + while ((str = reader.readLine()) != null) { + sb.append(str); + } + System.out.println("XML: " + sb.toString()); + List omElements = splitToRecords(sb.toString()); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + } catch (Exception e) { + throw new RuntimeException( + "ArXiv identifier is not valid or not exist"); + } + } finally { + if (method != null) { + method.releaseConnection(); + } + } + return results; + } + } + + private class SearchByIdCallable implements Callable> { + private Query query; + + private SearchByIdCallable(Query query) { + this.query = query; + } + + private SearchByIdCallable(String id) { + this.query = new Query(); + query.addParameter("id", id); + } + + @Override + public List call() throws Exception { + List results = new ArrayList(); + String arxivid = query.getParameterAsClass("id", String.class); + HttpGet method = null; + try { + HttpClient client = new DefaultHttpClient(); + HttpParams params = client.getParams(); + params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); + try { + URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); + if (StringUtils.isNotBlank(arxivid)) { + arxivid = arxivid.trim(); + if (arxivid.startsWith("http://arxiv.org/abs/")) { + arxivid = arxivid.substring("http://arxiv.org/abs/".length()); + } else if (arxivid.toLowerCase().startsWith("arxiv:")) { + arxivid = arxivid.substring("arxiv:".length()); + } + uriBuilder.addParameter("id_list", arxivid); + method = new HttpGet(uriBuilder.build()); + } + } catch (URISyntaxException ex) { + throw new HttpException(ex.getMessage()); + } + + // Execute the method. + HttpResponse response = client.execute(method); + StatusLine responseStatus = response.getStatusLine(); + int statusCode = responseStatus.getStatusCode(); + if (statusCode != HttpStatus.SC_OK) { + if (statusCode == HttpStatus.SC_BAD_REQUEST) { + throw new RuntimeException("arXiv query is not valid"); + } else { + throw new RuntimeException("Http call failed: " + + responseStatus); + } + } + try { + InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent()); + BufferedReader reader = new BufferedReader(isReader); + StringBuffer sb = new StringBuffer(); + String str; + while ((str = reader.readLine()) != null) { + sb.append(str); + } + List omElements = splitToRecords(sb.toString()); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + } catch (Exception e) { + throw new RuntimeException( + "ArXiv identifier is not valid or not exist"); + } + } finally { + if (method != null) { + method.releaseConnection(); + } + } + return results; + } + } + + private class FindMatchingRecordCallable implements Callable> { + private Query query; + + private FindMatchingRecordCallable(Item item) throws MetadataSourceException { + query = getGenerateQueryForItem().generateQueryForItem(item); + } + + public FindMatchingRecordCallable(Query q) { + query = q; + } + + @Override + public List call() throws Exception { + return null; + } + } + + private static List splitToRecords(String recordsSrc) { + OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(recordsSrc)); + OMElement element = records.getDocumentElement(); + AXIOMXPath xpath = null; + try { + xpath = new AXIOMXPath("ns:entry"); + xpath.addNamespace("ns", "http://www.w3.org/2005/Atom"); + List recordsList = xpath.selectNodes(element); + return recordsList; + } catch (JaxenException e) { + return null; + } + } + + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java b/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java index a803958a9d..3bf76438cd 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java @@ -16,7 +16,6 @@ import org.dspace.importer.external.metadatamapping.contributor.MetadataContribu import org.dspace.importer.external.metadatamapping.transform.GenerateQueryService; import org.dspace.importer.external.service.components.AbstractRemoteMetadataSource; import org.dspace.importer.external.service.components.MetadataSource; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Required; /** @@ -49,7 +48,6 @@ public abstract class AbstractImportMetadataSourceService extends Ab * * @param generateQueryForItem the query generator to be used. */ - @Autowired public void setGenerateQueryForItem(GenerateQueryService generateQueryForItem) { this.generateQueryForItem = generateQueryForItem; } diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index bbdf085619..a351280b98 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -19,10 +19,6 @@ - - - + + + + + + + + + + + + + + + + + + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Defines how an org.dspace.content.Item is mapped to a query in scopus. Please note that exactly one of + these must be present. If multiple are present the result is undefined. + + + + \ No newline at end of file diff --git a/dspace/config/spring/api/external-services.xml b/dspace/config/spring/api/external-services.xml index 098b53c2ca..af24e41980 100644 --- a/dspace/config/spring/api/external-services.xml +++ b/dspace/config/spring/api/external-services.xml @@ -31,10 +31,17 @@ - + + + + + + + +