mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 10:04:21 +00:00
DataCite plugin for import
Plugin for import of external metadata from DataCite for publications. Code by @johannastaudinger, @floriangantner and @philipprumpf.
This commit is contained in:

committed by
Philipp Rumpf

parent
ec0853ddad
commit
a9cee40a15
38
dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteFieldMapping.java
vendored
Normal file
38
dspace-api/src/main/java/org/dspace/importer/external/datacite/DataCiteFieldMapping.java
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.importer.external.datacite;
|
||||
|
||||
import java.util.Map;
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;
|
||||
|
||||
/**
|
||||
* An implementation of {@link AbstractMetadataFieldMapping}
|
||||
* Responsible for defining the mapping of the datacite metadatum fields on the DSpace metadatum fields
|
||||
*
|
||||
* @author Pasquale Cavallo (pasquale.cavallo at 4science dot it)
|
||||
* @author Florian Gantner (florian.gantner@uni-bamberg.de)
|
||||
*/
|
||||
public class DataCiteFieldMapping extends AbstractMetadataFieldMapping {
|
||||
|
||||
/**
|
||||
* Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
|
||||
* only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
|
||||
* what metadatafield is generated.
|
||||
*
|
||||
* @param metadataFieldMap The map containing the link between retrieve metadata and metadata that will be set to
|
||||
* the item.
|
||||
*/
|
||||
@Override
|
||||
@Resource(name = "dataciteMetadataFieldMap")
|
||||
public void setMetadataFieldMap(Map metadataFieldMap) {
|
||||
super.setMetadataFieldMap(metadataFieldMap);
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,379 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.importer.external.datacite;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import javax.el.MethodNotFoundException;
|
||||
import javax.ws.rs.client.Client;
|
||||
import javax.ws.rs.client.ClientBuilder;
|
||||
import javax.ws.rs.client.Invocation;
|
||||
import javax.ws.rs.client.WebTarget;
|
||||
import javax.ws.rs.core.Response;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
import com.jayway.jsonpath.ReadContext;
|
||||
import net.minidev.json.JSONArray;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.importer.external.datamodel.ImportRecord;
|
||||
import org.dspace.importer.external.datamodel.Query;
|
||||
import org.dspace.importer.external.exception.MetadataSourceException;
|
||||
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
|
||||
import org.dspace.importer.external.service.DoiCheck;
|
||||
import org.dspace.importer.external.service.components.QuerySource;
|
||||
|
||||
/**
|
||||
* Implements a data source for querying Datacite
|
||||
* Mainly copied from CrossRefImportMetadataSourceServiceImpl.
|
||||
*
|
||||
* optional Affiliation informations are not part of the API request.
|
||||
* https://support.datacite.org/docs/can-i-see-more-detailed-affiliation-information-in-the-rest-api
|
||||
*
|
||||
* @author Pasquale Cavallo (pasquale.cavallo at 4science dot it)
|
||||
* @author Florian Gantner (florian.gantner@uni-bamberg.de)
|
||||
*
|
||||
*/
|
||||
public class DataCiteImportMetadataSourceServiceImpl
|
||||
extends AbstractImportMetadataSourceService<String> implements QuerySource {
|
||||
|
||||
private WebTarget webTarget;
|
||||
|
||||
@Override
|
||||
public String getImportSource() {
|
||||
return "datacite";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() throws Exception {
|
||||
Client client = ClientBuilder.newClient();
|
||||
webTarget = client.target("https://api.datacite.org/dois/");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ImportRecord getRecord(String recordId) throws MetadataSourceException {
|
||||
List<ImportRecord> records = null;
|
||||
String id = getID(recordId);
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
records = retry(new SearchByIdCallable(id));
|
||||
} else {
|
||||
records = retry(new SearchByIdCallable(recordId));
|
||||
}
|
||||
return records == null || records.isEmpty() ? null : records.get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRecordsCount(String query) throws MetadataSourceException {
|
||||
String id = getID(query);
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
return retry(new DoiCheckCallable(id));
|
||||
}
|
||||
return retry(new CountByQueryCallable(query));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRecordsCount(Query query) throws MetadataSourceException {
|
||||
String id = getID(query.toString());
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
return retry(new DoiCheckCallable(id));
|
||||
}
|
||||
return retry(new CountByQueryCallable(query));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
|
||||
String id = getID(query.toString());
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
return retry(new SearchByIdCallable(id));
|
||||
}
|
||||
return retry(new SearchByQueryCallable(query, count, start));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException {
|
||||
String id = getID(query.toString());
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
return retry(new SearchByIdCallable(id));
|
||||
}
|
||||
return retry(new SearchByQueryCallable(query));
|
||||
}
|
||||
|
||||
@Override
|
||||
public ImportRecord getRecord(Query query) throws MetadataSourceException {
|
||||
List<ImportRecord> records = null;
|
||||
String id = getID(query.toString());
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
records = retry(new SearchByIdCallable(id));
|
||||
} else {
|
||||
records = retry(new SearchByIdCallable(query));
|
||||
}
|
||||
return records == null || records.isEmpty() ? null : records.get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
|
||||
String id = getID(query.toString());
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
return retry(new SearchByIdCallable(id));
|
||||
}
|
||||
return retry(new FindMatchingRecordCallable(query));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
|
||||
throw new MethodNotFoundException("This method is not implemented for Datacite");
|
||||
}
|
||||
|
||||
public String getID(String query) {
|
||||
if (DoiCheck.isDoi(query)) {
|
||||
return query;
|
||||
}
|
||||
//Workaround for encoded slashes.
|
||||
if (query.contains("%252F")) {
|
||||
query = query.replace("%252F", "/");
|
||||
}
|
||||
if (DoiCheck.isDoi(query)) {
|
||||
return query;
|
||||
}
|
||||
return StringUtils.EMPTY;
|
||||
}
|
||||
|
||||
private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
|
||||
|
||||
private Query query;
|
||||
|
||||
private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) {
|
||||
query = new Query();
|
||||
query.addParameter("query", queryString);
|
||||
query.addParameter("count", maxResult);
|
||||
query.addParameter("start", start);
|
||||
}
|
||||
|
||||
private SearchByQueryCallable(Query query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ImportRecord> call() throws Exception {
|
||||
List<ImportRecord> results = new ArrayList<>();
|
||||
HttpGet method = null;
|
||||
try {
|
||||
Integer count = query.getParameterAsClass("count", Integer.class);
|
||||
Integer start = query.getParameterAsClass("start", Integer.class);
|
||||
WebTarget local = webTarget.queryParam("query", query.getParameterAsClass("query", String.class));
|
||||
if (count != null) {
|
||||
local = local.queryParam("rows", count);
|
||||
}
|
||||
if (start != null) {
|
||||
local = local.queryParam("offset", start);
|
||||
}
|
||||
Invocation.Builder invocationBuilder = local.request();
|
||||
Response response = invocationBuilder.get();
|
||||
if (response.getStatus() != 200) {
|
||||
return null;
|
||||
}
|
||||
String responseString = response.readEntity(String.class);
|
||||
ReadContext ctx = JsonPath.parse(responseString);
|
||||
Object o = ctx.read("$.data.attributes");
|
||||
if (o.getClass().isAssignableFrom(JSONArray.class)) {
|
||||
JSONArray array = (JSONArray)o;
|
||||
int size = array.size();
|
||||
for (int index = 0; index < size; index++) {
|
||||
Gson gson = new Gson();
|
||||
String innerJson = gson.toJson(array.get(index), LinkedHashMap.class);
|
||||
results.add(transformSourceRecords(innerJson));
|
||||
}
|
||||
} else {
|
||||
results.add(transformSourceRecords(o.toString()));
|
||||
}
|
||||
return results;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e.getMessage(), e);
|
||||
} finally {
|
||||
if (method != null) {
|
||||
method.releaseConnection();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class SearchByIdCallable implements Callable<List<ImportRecord>> {
|
||||
private Query query;
|
||||
|
||||
private SearchByIdCallable(Query query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
private SearchByIdCallable(String id) {
|
||||
this.query = new Query();
|
||||
query.addParameter("id", id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ImportRecord> call() throws Exception {
|
||||
List<ImportRecord> results = new ArrayList<>();
|
||||
HttpGet method = null;
|
||||
try {
|
||||
WebTarget local = webTarget.path(query.getParameterAsClass("id", String.class));
|
||||
Invocation.Builder invocationBuilder = local.request();
|
||||
Response response = invocationBuilder.get();
|
||||
if (response.getStatus() != 200) {
|
||||
return null;
|
||||
}
|
||||
String responseString = response.readEntity(String.class);
|
||||
ReadContext ctx = JsonPath.parse(responseString);
|
||||
Object o = ctx.read("$.data.attributes");
|
||||
if (o.getClass().isAssignableFrom(JSONArray.class)) {
|
||||
JSONArray array = (JSONArray)o;
|
||||
int size = array.size();
|
||||
for (int index = 0; index < size; index++) {
|
||||
Gson gson = new Gson();
|
||||
String innerJson = gson.toJson(array.get(index), LinkedHashMap.class);
|
||||
results.add(transformSourceRecords(innerJson));
|
||||
}
|
||||
} else {
|
||||
Gson gson = new Gson();
|
||||
results.add(transformSourceRecords(gson.toJson(o, Object.class)));
|
||||
}
|
||||
return results;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e.getMessage(), e);
|
||||
} finally {
|
||||
if (method != null) {
|
||||
method.releaseConnection();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class FindMatchingRecordCallable implements Callable<List<ImportRecord>> {
|
||||
|
||||
private Query query;
|
||||
|
||||
private FindMatchingRecordCallable(Query q) {
|
||||
query = q;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ImportRecord> call() throws Exception {
|
||||
String queryValue = query.getParameterAsClass("query", String.class);
|
||||
Integer count = query.getParameterAsClass("count", Integer.class);
|
||||
Integer start = query.getParameterAsClass("start", Integer.class);
|
||||
List<ImportRecord> results = new ArrayList<>();
|
||||
HttpGet method = null;
|
||||
try {
|
||||
WebTarget local = webTarget;
|
||||
if (queryValue != null) {
|
||||
local = local.queryParam("query", queryValue);
|
||||
}
|
||||
if (count != null) {
|
||||
local = local.queryParam("page[size]", count);
|
||||
}
|
||||
if (start != null) {
|
||||
local = local.queryParam("page[number]", start);
|
||||
}
|
||||
Invocation.Builder invocationBuilder = local.request();
|
||||
Response response = invocationBuilder.get();
|
||||
if (response.getStatus() != 200) {
|
||||
return null;
|
||||
}
|
||||
String responseString = response.readEntity(String.class);
|
||||
ReadContext ctx = JsonPath.parse(responseString);
|
||||
Object o = ctx.read("$.data.attributes");
|
||||
if (o.getClass().isAssignableFrom(JSONArray.class)) {
|
||||
JSONArray array = (JSONArray)o;
|
||||
int size = array.size();
|
||||
for (int index = 0; index < size; index++) {
|
||||
Gson gson = new Gson();
|
||||
String innerJson = gson.toJson(array.get(index), LinkedHashMap.class);
|
||||
results.add(transformSourceRecords(innerJson));
|
||||
}
|
||||
} else {
|
||||
results.add(transformSourceRecords(o.toString()));
|
||||
}
|
||||
return results;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e.getMessage(), e);
|
||||
} finally {
|
||||
if (method != null) {
|
||||
method.releaseConnection();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class CountByQueryCallable implements Callable<Integer> {
|
||||
private Query query;
|
||||
|
||||
|
||||
private CountByQueryCallable(String queryString) {
|
||||
query = new Query();
|
||||
query.addParameter("query", queryString);
|
||||
}
|
||||
|
||||
private CountByQueryCallable(Query query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Integer call() throws Exception {
|
||||
HttpGet method = null;
|
||||
try {
|
||||
WebTarget local = webTarget.queryParam("query", query.getParameterAsClass("query", String.class));
|
||||
Invocation.Builder invocationBuilder = local.request();
|
||||
Response response = invocationBuilder.get();
|
||||
if (response.getStatus() != 200) {
|
||||
return null;
|
||||
}
|
||||
String responseString = response.readEntity(String.class);
|
||||
ReadContext ctx = JsonPath.parse(responseString);
|
||||
return ctx.read("$.meta.total");
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e.getMessage(), e);
|
||||
} finally {
|
||||
if (method != null) {
|
||||
method.releaseConnection();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class DoiCheckCallable implements Callable<Integer> {
|
||||
|
||||
private final Query query;
|
||||
|
||||
private DoiCheckCallable(final String id) {
|
||||
final Query query = new Query();
|
||||
query.addParameter("id", id);
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
private DoiCheckCallable(final Query query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer call() throws Exception {
|
||||
WebTarget local = webTarget.path(query.getParameterAsClass("id", String.class));
|
||||
Invocation.Builder invocationBuilder = local.request();
|
||||
Response response = invocationBuilder.head();
|
||||
return response.getStatus() == 200 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -34,6 +34,14 @@
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
<bean id="DataCiteImportService"
|
||||
class="org.dspace.importer.external.datacite.DataCiteImportMetadataSourceServiceImpl" scope="singleton">
|
||||
<property name="metadataFieldMapping" ref="DataCiteMetadataFieldMapping"/>
|
||||
</bean>
|
||||
<bean id="DataCiteMetadataFieldMapping"
|
||||
class="org.dspace.importer.external.datacite.DataCiteFieldMapping">
|
||||
</bean>
|
||||
|
||||
<bean id="ArXivImportService"
|
||||
class="org.dspace.importer.external.arxiv.service.ArXivImportMetadataSourceServiceImpl" scope="singleton">
|
||||
<property name="metadataFieldMapping" ref="ArXivMetadataFieldMapping"/>
|
||||
|
@@ -91,4 +91,15 @@
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
</beans>
|
||||
<bean id="dataciteLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
|
||||
<property name="metadataSource" ref="DataCiteImportService"/>
|
||||
<property name="sourceIdentifier" value="datacite"/>
|
||||
<property name="recordIdMetadata" value="dc.identifier.doi"/>
|
||||
<property name="supportedEntityTypes">
|
||||
<list>
|
||||
<value>Publication</value>
|
||||
<value>none</value>
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
</beans>
|
||||
|
@@ -53,7 +53,7 @@ public class ExternalSourcesRestControllerIT extends AbstractControllerIntegrati
|
||||
ExternalSourceMatcher.matchExternalSource(
|
||||
"openAIREFunding", "openAIREFunding", false)
|
||||
)))
|
||||
.andExpect(jsonPath("$.page.totalElements", Matchers.is(9)));
|
||||
.andExpect(jsonPath("$.page.totalElements", Matchers.is(10)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
43
dspace/config/spring/api/datacite-integration.xml
Normal file
43
dspace/config/spring/api/datacite-integration.xml
Normal file
@@ -0,0 +1,43 @@
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:context="http://www.springframework.org/schema/context"
|
||||
xmlns:util="http://www.springframework.org/schema/util"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
|
||||
http://www.springframework.org/schema/context
|
||||
http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd"
|
||||
default-autowire-candidates="*Service,*DAO,javax.sql.DataSource">
|
||||
|
||||
|
||||
<context:annotation-config/>
|
||||
<!-- allows us to use spring annotations in beans -->
|
||||
|
||||
<util:map id="dataciteMetadataFieldMap" key-type="org.dspace.importer.external.metadatamapping.MetadataFieldConfig"
|
||||
value-type="org.dspace.importer.external.metadatamapping.contributor.MetadataContributor">
|
||||
<description>Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
|
||||
only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
|
||||
what metadatafield is generated.
|
||||
</description>
|
||||
<entry key-ref="datacite.title" value-ref="dataciteTitleContrib"/>
|
||||
<entry key-ref="datacite.id" value-ref="dataciteIDContrib"/>
|
||||
<!-- TODO: Further Mappings here! querys are applied among the data.attributes object containing the informations -->
|
||||
</util:map>
|
||||
|
||||
<bean id="dataciteTitleContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
|
||||
<property name="field" ref="datacite.title"/>
|
||||
<property name="query" value="/titles"/>
|
||||
</bean>
|
||||
<bean id="datacite.title" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
|
||||
<constructor-arg value="dc.title"/>
|
||||
</bean>
|
||||
|
||||
<!-- must be present to be imported, since it's used as the recordId-->
|
||||
<bean id="dataciteIDContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
|
||||
<property name="field" ref="datacite.id"/>
|
||||
<property name="query" value="/doi"/>
|
||||
</bean>
|
||||
<bean id="datacite.id" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
|
||||
<constructor-arg value="dc.identifier.doi"/>
|
||||
</bean>
|
||||
|
||||
</beans>
|
Reference in New Issue
Block a user