DataCite plugin for import

Plugin for import of external metadata from DataCite for publications.
Code by @johannastaudinger, @floriangantner and @philipprumpf.
This commit is contained in:
Johanna Staudinger
2022-11-16 14:14:25 +01:00
committed by Philipp Rumpf
parent ec0853ddad
commit a9cee40a15
6 changed files with 481 additions and 2 deletions

View File

@@ -0,0 +1,38 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.datacite;
import java.util.Map;
import javax.annotation.Resource;
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;
/**
* An implementation of {@link AbstractMetadataFieldMapping}
* Responsible for defining the mapping of the datacite metadatum fields on the DSpace metadatum fields
*
* @author Pasquale Cavallo (pasquale.cavallo at 4science dot it)
* @author Florian Gantner (florian.gantner@uni-bamberg.de)
*/
public class DataCiteFieldMapping extends AbstractMetadataFieldMapping {
/**
* Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
* only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
* what metadatafield is generated.
*
* @param metadataFieldMap The map containing the link between retrieve metadata and metadata that will be set to
* the item.
*/
@Override
@Resource(name = "dataciteMetadataFieldMap")
public void setMetadataFieldMap(Map metadataFieldMap) {
super.setMetadataFieldMap(metadataFieldMap);
}
}

View File

@@ -0,0 +1,379 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.datacite;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.Callable;
import javax.el.MethodNotFoundException;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.Invocation;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.Response;
import com.google.gson.Gson;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.ReadContext;
import net.minidev.json.JSONArray;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.methods.HttpGet;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
import org.dspace.importer.external.service.DoiCheck;
import org.dspace.importer.external.service.components.QuerySource;
/**
* Implements a data source for querying Datacite
* Mainly copied from CrossRefImportMetadataSourceServiceImpl.
*
* optional Affiliation informations are not part of the API request.
* https://support.datacite.org/docs/can-i-see-more-detailed-affiliation-information-in-the-rest-api
*
* @author Pasquale Cavallo (pasquale.cavallo at 4science dot it)
* @author Florian Gantner (florian.gantner@uni-bamberg.de)
*
*/
public class DataCiteImportMetadataSourceServiceImpl
extends AbstractImportMetadataSourceService<String> implements QuerySource {
private WebTarget webTarget;
@Override
public String getImportSource() {
return "datacite";
}
@Override
public void init() throws Exception {
Client client = ClientBuilder.newClient();
webTarget = client.target("https://api.datacite.org/dois/");
}
@Override
public ImportRecord getRecord(String recordId) throws MetadataSourceException {
List<ImportRecord> records = null;
String id = getID(recordId);
if (StringUtils.isNotBlank(id)) {
records = retry(new SearchByIdCallable(id));
} else {
records = retry(new SearchByIdCallable(recordId));
}
return records == null || records.isEmpty() ? null : records.get(0);
}
@Override
public int getRecordsCount(String query) throws MetadataSourceException {
String id = getID(query);
if (StringUtils.isNotBlank(id)) {
return retry(new DoiCheckCallable(id));
}
return retry(new CountByQueryCallable(query));
}
@Override
public int getRecordsCount(Query query) throws MetadataSourceException {
String id = getID(query.toString());
if (StringUtils.isNotBlank(id)) {
return retry(new DoiCheckCallable(id));
}
return retry(new CountByQueryCallable(query));
}
@Override
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
String id = getID(query.toString());
if (StringUtils.isNotBlank(id)) {
return retry(new SearchByIdCallable(id));
}
return retry(new SearchByQueryCallable(query, count, start));
}
@Override
public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException {
String id = getID(query.toString());
if (StringUtils.isNotBlank(id)) {
return retry(new SearchByIdCallable(id));
}
return retry(new SearchByQueryCallable(query));
}
@Override
public ImportRecord getRecord(Query query) throws MetadataSourceException {
List<ImportRecord> records = null;
String id = getID(query.toString());
if (StringUtils.isNotBlank(id)) {
records = retry(new SearchByIdCallable(id));
} else {
records = retry(new SearchByIdCallable(query));
}
return records == null || records.isEmpty() ? null : records.get(0);
}
@Override
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
String id = getID(query.toString());
if (StringUtils.isNotBlank(id)) {
return retry(new SearchByIdCallable(id));
}
return retry(new FindMatchingRecordCallable(query));
}
@Override
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
throw new MethodNotFoundException("This method is not implemented for Datacite");
}
public String getID(String query) {
if (DoiCheck.isDoi(query)) {
return query;
}
//Workaround for encoded slashes.
if (query.contains("%252F")) {
query = query.replace("%252F", "/");
}
if (DoiCheck.isDoi(query)) {
return query;
}
return StringUtils.EMPTY;
}
private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
private Query query;
private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) {
query = new Query();
query.addParameter("query", queryString);
query.addParameter("count", maxResult);
query.addParameter("start", start);
}
private SearchByQueryCallable(Query query) {
this.query = query;
}
@Override
public List<ImportRecord> call() throws Exception {
List<ImportRecord> results = new ArrayList<>();
HttpGet method = null;
try {
Integer count = query.getParameterAsClass("count", Integer.class);
Integer start = query.getParameterAsClass("start", Integer.class);
WebTarget local = webTarget.queryParam("query", query.getParameterAsClass("query", String.class));
if (count != null) {
local = local.queryParam("rows", count);
}
if (start != null) {
local = local.queryParam("offset", start);
}
Invocation.Builder invocationBuilder = local.request();
Response response = invocationBuilder.get();
if (response.getStatus() != 200) {
return null;
}
String responseString = response.readEntity(String.class);
ReadContext ctx = JsonPath.parse(responseString);
Object o = ctx.read("$.data.attributes");
if (o.getClass().isAssignableFrom(JSONArray.class)) {
JSONArray array = (JSONArray)o;
int size = array.size();
for (int index = 0; index < size; index++) {
Gson gson = new Gson();
String innerJson = gson.toJson(array.get(index), LinkedHashMap.class);
results.add(transformSourceRecords(innerJson));
}
} else {
results.add(transformSourceRecords(o.toString()));
}
return results;
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
}
private class SearchByIdCallable implements Callable<List<ImportRecord>> {
private Query query;
private SearchByIdCallable(Query query) {
this.query = query;
}
private SearchByIdCallable(String id) {
this.query = new Query();
query.addParameter("id", id);
}
@Override
public List<ImportRecord> call() throws Exception {
List<ImportRecord> results = new ArrayList<>();
HttpGet method = null;
try {
WebTarget local = webTarget.path(query.getParameterAsClass("id", String.class));
Invocation.Builder invocationBuilder = local.request();
Response response = invocationBuilder.get();
if (response.getStatus() != 200) {
return null;
}
String responseString = response.readEntity(String.class);
ReadContext ctx = JsonPath.parse(responseString);
Object o = ctx.read("$.data.attributes");
if (o.getClass().isAssignableFrom(JSONArray.class)) {
JSONArray array = (JSONArray)o;
int size = array.size();
for (int index = 0; index < size; index++) {
Gson gson = new Gson();
String innerJson = gson.toJson(array.get(index), LinkedHashMap.class);
results.add(transformSourceRecords(innerJson));
}
} else {
Gson gson = new Gson();
results.add(transformSourceRecords(gson.toJson(o, Object.class)));
}
return results;
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
}
private class FindMatchingRecordCallable implements Callable<List<ImportRecord>> {
private Query query;
private FindMatchingRecordCallable(Query q) {
query = q;
}
@Override
public List<ImportRecord> call() throws Exception {
String queryValue = query.getParameterAsClass("query", String.class);
Integer count = query.getParameterAsClass("count", Integer.class);
Integer start = query.getParameterAsClass("start", Integer.class);
List<ImportRecord> results = new ArrayList<>();
HttpGet method = null;
try {
WebTarget local = webTarget;
if (queryValue != null) {
local = local.queryParam("query", queryValue);
}
if (count != null) {
local = local.queryParam("page[size]", count);
}
if (start != null) {
local = local.queryParam("page[number]", start);
}
Invocation.Builder invocationBuilder = local.request();
Response response = invocationBuilder.get();
if (response.getStatus() != 200) {
return null;
}
String responseString = response.readEntity(String.class);
ReadContext ctx = JsonPath.parse(responseString);
Object o = ctx.read("$.data.attributes");
if (o.getClass().isAssignableFrom(JSONArray.class)) {
JSONArray array = (JSONArray)o;
int size = array.size();
for (int index = 0; index < size; index++) {
Gson gson = new Gson();
String innerJson = gson.toJson(array.get(index), LinkedHashMap.class);
results.add(transformSourceRecords(innerJson));
}
} else {
results.add(transformSourceRecords(o.toString()));
}
return results;
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
}
private class CountByQueryCallable implements Callable<Integer> {
private Query query;
private CountByQueryCallable(String queryString) {
query = new Query();
query.addParameter("query", queryString);
}
private CountByQueryCallable(Query query) {
this.query = query;
}
@Override
public Integer call() throws Exception {
HttpGet method = null;
try {
WebTarget local = webTarget.queryParam("query", query.getParameterAsClass("query", String.class));
Invocation.Builder invocationBuilder = local.request();
Response response = invocationBuilder.get();
if (response.getStatus() != 200) {
return null;
}
String responseString = response.readEntity(String.class);
ReadContext ctx = JsonPath.parse(responseString);
return ctx.read("$.meta.total");
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
}
private class DoiCheckCallable implements Callable<Integer> {
private final Query query;
private DoiCheckCallable(final String id) {
final Query query = new Query();
query.addParameter("id", id);
this.query = query;
}
private DoiCheckCallable(final Query query) {
this.query = query;
}
@Override
public Integer call() throws Exception {
WebTarget local = webTarget.path(query.getParameterAsClass("id", String.class));
Invocation.Builder invocationBuilder = local.request();
Response response = invocationBuilder.head();
return response.getStatus() == 200 ? 1 : 0;
}
}
}

View File

@@ -34,6 +34,14 @@
</property>
</bean>
<bean id="DataCiteImportService"
class="org.dspace.importer.external.datacite.DataCiteImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="DataCiteMetadataFieldMapping"/>
</bean>
<bean id="DataCiteMetadataFieldMapping"
class="org.dspace.importer.external.datacite.DataCiteFieldMapping">
</bean>
<bean id="ArXivImportService"
class="org.dspace.importer.external.arxiv.service.ArXivImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="ArXivMetadataFieldMapping"/>

View File

@@ -91,4 +91,15 @@
</property>
</bean>
<bean id="dataciteLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="DataCiteImportService"/>
<property name="sourceIdentifier" value="datacite"/>
<property name="recordIdMetadata" value="dc.identifier.doi"/>
<property name="supportedEntityTypes">
<list>
<value>Publication</value>
<value>none</value>
</list>
</property>
</bean>
</beans>

View File

@@ -53,7 +53,7 @@ public class ExternalSourcesRestControllerIT extends AbstractControllerIntegrati
ExternalSourceMatcher.matchExternalSource(
"openAIREFunding", "openAIREFunding", false)
)))
.andExpect(jsonPath("$.page.totalElements", Matchers.is(9)));
.andExpect(jsonPath("$.page.totalElements", Matchers.is(10)));
}
@Test

View File

@@ -0,0 +1,43 @@
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:util="http://www.springframework.org/schema/util"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd"
default-autowire-candidates="*Service,*DAO,javax.sql.DataSource">
<context:annotation-config/>
<!-- allows us to use spring annotations in beans -->
<util:map id="dataciteMetadataFieldMap" key-type="org.dspace.importer.external.metadatamapping.MetadataFieldConfig"
value-type="org.dspace.importer.external.metadatamapping.contributor.MetadataContributor">
<description>Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
what metadatafield is generated.
</description>
<entry key-ref="datacite.title" value-ref="dataciteTitleContrib"/>
<entry key-ref="datacite.id" value-ref="dataciteIDContrib"/>
<!-- TODO: Further Mappings here! querys are applied among the data.attributes object containing the informations -->
</util:map>
<bean id="dataciteTitleContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="field" ref="datacite.title"/>
<property name="query" value="/titles"/>
</bean>
<bean id="datacite.title" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.title"/>
</bean>
<!-- must be present to be imported, since it's used as the recordId-->
<bean id="dataciteIDContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="field" ref="datacite.id"/>
<property name="query" value="/doi"/>
</bean>
<bean id="datacite.id" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.identifier.doi"/>
</bean>
</beans>