From fd47e47f61f075cc7ae8973547d37e9c9df56f4c Mon Sep 17 00:00:00 2001 From: Adamo Date: Thu, 6 Feb 2025 00:25:19 +0100 Subject: [PATCH] [CST-18015] Added base OpenAlex Publication integration. [CST-18015] Added base OpenAlex Publication integration. --- .../AbstractJsonPathMetadataProcessor.java | 62 ++++ .../contributor/InvertedIndexProcessor.java | 55 ++++ .../JsonPathMetadataProcessor.java | 2 +- .../OpenAlexDateMetadataProcessor.java | 57 ++++ .../OpenAlexIdMetadataProcessor.java | 61 ++++ .../OpenAlexPublicationFieldMapping.java | 26 ++ .../OpenAlexImportMetadataSourceService.java | 16 + ...enAlexImportMetadataSourceServiceImpl.java | 287 ++++++++++++++++++ .../spring-dspace-addon-import-services.xml | 13 +- .../config/spring/api/external-services.xml | 22 ++ .../spring/api/openalex-integration.xml | 138 +++++++++ 11 files changed, 736 insertions(+), 3 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/AbstractJsonPathMetadataProcessor.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/InvertedIndexProcessor.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexDateMetadataProcessor.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexIdMetadataProcessor.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexPublicationFieldMapping.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceService.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceServiceImpl.java create mode 100644 dspace/config/spring/api/openalex-integration.xml diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/AbstractJsonPathMetadataProcessor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/AbstractJsonPathMetadataProcessor.java new file mode 100644 index 0000000000..9cf87f7d89 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/AbstractJsonPathMetadataProcessor.java @@ -0,0 +1,62 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.util.ArrayList; +import java.util.Collection; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; + + +/** + * @author adamo.fapohunda at 4science.com + **/ +public abstract class AbstractJsonPathMetadataProcessor implements JsonPathMetadataProcessor { + + @Override + public Collection processMetadata(String json) { + Collection values = new ArrayList<>(); + JsonNode jsonNode = convertStringJsonToJsonNode(json); + JsonNode node = jsonNode.at(getPath()); + if (node.isArray()) { + for (JsonNode value : node) { + String nodeValue = getStringValue(value); + if (StringUtils.isNotBlank(nodeValue)) { + values.add(nodeValue); + } + } + } else if (!node.isNull() && StringUtils.isNotBlank(node.toString())) { + String nodeValue = getStringValue(node); + if (StringUtils.isNotBlank(nodeValue)) { + values.add(nodeValue); + } + } + return values; + } + + protected abstract String getStringValue(JsonNode node); + + protected abstract Logger getLogger(); + + protected abstract String getPath(); + + private JsonNode convertStringJsonToJsonNode(String json) { + ObjectMapper mapper = new ObjectMapper(); + JsonNode body = null; + try { + body = mapper.readTree(json); + } catch (JsonProcessingException e) { + getLogger().error("Unable to process json response.", e); + } + return body; + } +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/InvertedIndexProcessor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/InvertedIndexProcessor.java new file mode 100644 index 0000000000..180e29e592 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/InvertedIndexProcessor.java @@ -0,0 +1,55 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.util.SortedMap; +import java.util.TreeMap; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + + +/** + * @author adamo.fapohunda at 4science.com + **/ +public class InvertedIndexProcessor extends AbstractJsonPathMetadataProcessor { + + private static final Logger log = LogManager.getLogger(InvertedIndexProcessor.class); + + private String path; + + @Override + protected String getStringValue(JsonNode node) { + if (node == null || node.isEmpty()) { + return ""; + } + + SortedMap positionMap = new TreeMap<>(); + node.at(path).fields().forEachRemaining(entry -> entry.getValue() + .forEach(position -> + positionMap + .put(position.asInt(), entry.getKey()))); + + return String.join(" ", positionMap.values()); + } + + @Override + protected Logger getLogger() { + return log; + } + + @Override + protected String getPath() { + return ""; + } + + public void setPath(String path) { + this.path = path; + } +} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/JsonPathMetadataProcessor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/JsonPathMetadataProcessor.java index 2de0c6a0bb..eb1be7d411 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/JsonPathMetadataProcessor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/JsonPathMetadataProcessor.java @@ -18,6 +18,6 @@ import java.util.Collection; */ public interface JsonPathMetadataProcessor { - public Collection processMetadata(String json); + Collection processMetadata(String json); } \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexDateMetadataProcessor.java b/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexDateMetadataProcessor.java new file mode 100644 index 0000000000..6556bf5692 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexDateMetadataProcessor.java @@ -0,0 +1,57 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.openalex.metadatamapping; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.importer.external.metadatamapping.contributor.AbstractJsonPathMetadataProcessor; + +/** + * @author adamo.fapohunda at 4science.com + **/ +public class OpenAlexDateMetadataProcessor extends AbstractJsonPathMetadataProcessor { + + private static final Logger log = LogManager.getLogger(OpenAlexDateMetadataProcessor.class); + + private String path; + + @Override + protected String getStringValue(JsonNode node) { + + if (node == null || !node.isTextual()) { + throw new IllegalArgumentException("Input must be a non-null JsonNode containing a text value"); + } + + try { + String dateStr = node.asText(); + LocalDate date = LocalDate.parse(dateStr, DateTimeFormatter.ISO_DATE); + return date.toString(); + } catch (DateTimeParseException e) { + throw new IllegalArgumentException("Invalid ISO 8601 date format: " + e.getMessage(), e); + } + } + + @Override + protected Logger getLogger() { + return log; + } + + @Override + protected String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } +} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexIdMetadataProcessor.java b/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexIdMetadataProcessor.java new file mode 100644 index 0000000000..e53cbcecae --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexIdMetadataProcessor.java @@ -0,0 +1,61 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.openalex.metadatamapping; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.importer.external.metadatamapping.contributor.AbstractJsonPathMetadataProcessor; + +/** + * @author adamo.fapohunda at 4science.com + **/ +public class OpenAlexIdMetadataProcessor extends AbstractJsonPathMetadataProcessor { + + private static final Logger log = LogManager.getLogger(OpenAlexIdMetadataProcessor.class); + + private String path; + + private String toBeReplaced; + + private String replacement; + + @Override + protected String getStringValue(JsonNode node) { + if (node == null || !node.isTextual()) { + throw new IllegalArgumentException("Input must be a non-null JsonNode containing a text value"); + } + String idStr = node.asText(); + if (toBeReplaced == null || toBeReplaced.isEmpty() || replacement == null) { + return idStr; + } + return idStr.replaceAll(toBeReplaced, replacement); + } + + @Override + protected Logger getLogger() { + return log; + } + + @Override + protected String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } + + public void setToBeReplaced(String toBeReplaced) { + this.toBeReplaced = toBeReplaced; + } + + public void setReplacement(String replacement) { + this.replacement = replacement; + } +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexPublicationFieldMapping.java b/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexPublicationFieldMapping.java new file mode 100644 index 0000000000..f1d9cf6512 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/openalex/metadatamapping/OpenAlexPublicationFieldMapping.java @@ -0,0 +1,26 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.openalex.metadatamapping; + +import java.util.Map; + +import jakarta.annotation.Resource; +import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping; + + +/** + * @author adamo.fapohunda at 4science.com + **/ +public class OpenAlexPublicationFieldMapping extends AbstractMetadataFieldMapping { + + @Override + @Resource(name = "openalexPublicationsMetadataFieldMap") + public void setMetadataFieldMap(Map metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceService.java b/dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceService.java new file mode 100644 index 0000000000..01111de029 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceService.java @@ -0,0 +1,16 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.openalex.service; + +import org.dspace.importer.external.service.components.QuerySource; + +/** + * @author adamo.fapohunda at 4science.com + **/ +public interface OpenAlexImportMetadataSourceService extends QuerySource { +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceServiceImpl.java new file mode 100644 index 0000000000..7ec23aa744 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/openalex/service/OpenAlexImportMetadataSourceServiceImpl.java @@ -0,0 +1,287 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.openalex.service; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import jakarta.el.MethodNotFoundException; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.content.Item; +import org.dspace.importer.external.datamodel.ImportRecord; +import org.dspace.importer.external.datamodel.Query; +import org.dspace.importer.external.exception.MetadataSourceException; +import org.dspace.importer.external.liveimportclient.service.LiveImportClient; +import org.dspace.importer.external.liveimportclient.service.LiveImportClientImpl; +import org.dspace.importer.external.service.AbstractImportMetadataSourceService; +import org.springframework.beans.factory.annotation.Autowired; + + +/** + * @author adamo.fapohunda at 4science.com + **/ +public class OpenAlexImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService + implements OpenAlexImportMetadataSourceService { + + private final static Logger log = LogManager.getLogger(); + private final int timeout = 1000; + private String url; + + @Autowired + private LiveImportClient liveImportClient; + + @Override + public String getImportSource() { + return "openalex"; + } + + @Override + public ImportRecord getRecord(String id) throws MetadataSourceException { + if (id == null) { + throw new MetadataSourceException("ID cannot be null"); + } + List records = retry(new SearchByIdCallable(id)); + return CollectionUtils.isEmpty(records) ? null : records.get(0); + } + + @Override + public int getRecordsCount(String query) throws MetadataSourceException { + if (query == null) { + throw new MetadataSourceException("Query cannot be null"); + } + return retry(new CountByQueryCallable(query)); + } + + @Override + public int getRecordsCount(Query query) throws MetadataSourceException { + if (query == null) { + throw new MetadataSourceException("Query cannot be null"); + } + return retry(new CountByQueryCallable(query)); + } + + @Override + public Collection getRecords(String query, int start, int count) throws MetadataSourceException { + if (query == null) { + throw new MetadataSourceException("Query cannot be null"); + } + return retry(new SearchByQueryCallable(query, start, count)); + } + + @Override + public Collection getRecords(Query query) throws MetadataSourceException { + throw new MethodNotFoundException("This method is not implemented for OpenAlex"); + } + + @Override + public ImportRecord getRecord(Query query) throws MetadataSourceException { + if (query == null) { + throw new MetadataSourceException("Query cannot be null"); + } + List records = retry(new SearchByIdCallable(query)); + return CollectionUtils.isEmpty(records) ? null : records.get(0); + } + + @Override + public Collection findMatchingRecords(Query query) throws MetadataSourceException { + throw new MethodNotFoundException("This method is not implemented for OpenAlex"); + } + + @Override + public Collection findMatchingRecords(Item item) throws MetadataSourceException { + throw new MethodNotFoundException("This method is not implemented for OpenAlex"); + } + + @Override + public void init() throws Exception { + if (liveImportClient == null) { + throw new IllegalStateException("LiveImportClient not properly initialized"); + } + if (StringUtils.isBlank(url)) { + throw new IllegalStateException("URL not properly configured"); + } + } + + public Integer count(String query) throws MetadataSourceException { + if (query == null) { + throw new MetadataSourceException("Query cannot be null"); + } + Map> params = new HashMap<>(); + Map uriParams = new HashMap<>(); + params.put(LiveImportClientImpl.URI_PARAMETERS, uriParams); + try { + uriParams.put("search", query); + String resp = liveImportClient.executeHttpGetRequest(timeout, this.url, params); + if (StringUtils.isEmpty(resp)) { + log.error("Got an empty response from LiveImportClient for query: {}", query); + return 0; + } + JsonNode jsonNode = convertStringJsonToJsonNode(resp); + if (jsonNode != null && jsonNode.hasNonNull("meta") + && jsonNode.at("/meta/count").isNumber()) { + return jsonNode.at("/meta/count").asInt(); + } + } catch (Exception e) { + log.error("Error executing count query", e); + } + return 0; + } + + private List searchById(String id) { + List results = new ArrayList<>(); + try { + String resp = liveImportClient.executeHttpGetRequest(timeout, this.url + "/" + id, new HashMap<>()); + if (StringUtils.isEmpty(resp)) { + return results; + } + JsonNode jsonNode = convertStringJsonToJsonNode(resp); + if (jsonNode != null) { + ImportRecord record = transformSourceRecords(jsonNode.toString()); + if (record != null) { + results.add(record); + } + } + } catch (Exception e) { + log.error("Error searching by ID: {}", id, e); + } + return results; + } + + private List search(String query, Integer page, Integer pageSize) { + List results = new ArrayList<>(); + Map> params = new HashMap<>(); + Map uriParams = new HashMap<>(); + params.put(LiveImportClientImpl.URI_PARAMETERS, uriParams); + + try { + uriParams.put("search", query); + if (page != null) { + uriParams.put("page", String.valueOf(page + 1)); + } + if (pageSize != null) { + uriParams.put("per_page", String.valueOf(pageSize)); + } + + String resp = liveImportClient.executeHttpGetRequest(timeout, this.url, params); + if (StringUtils.isEmpty(resp)) { + return results; + } + + JsonNode jsonNode = convertStringJsonToJsonNode(resp); + if (jsonNode != null) { + JsonNode docs = jsonNode.at("/results"); + if (docs != null && docs.isArray()) { + for (JsonNode node : docs) { + if (node != null) { + ImportRecord record = transformSourceRecords(node.toString()); + if (record != null) { + results.add(record); + } + } + } + } + } + } catch (Exception e) { + log.error("Error executing search query", e); + } + return results; + } + + + private JsonNode convertStringJsonToJsonNode(String json) { + if (StringUtils.isEmpty(json)) { + return null; + } + try { + return new ObjectMapper().readTree(json); + } catch (JsonProcessingException e) { + log.error("Unable to process JSON response", e); + return null; + } + } + + public void setUrl(String url) { + this.url = StringUtils.trimToNull(url); + } + + private class SearchByQueryCallable implements Callable> { + private final Query query; + + private SearchByQueryCallable(String queryString, int start, int count) { + query = new Query(); + query.addParameter("query", queryString); + query.addParameter("page", start / count); + query.addParameter("count", count); + } + + @Override + public List call() throws Exception { + String queryString = query.getParameterAsClass("query", String.class); + if (queryString == null) { + throw new MetadataSourceException("Query cannot be null"); + } + return search(queryString, + query.getParameterAsClass("page", Integer.class), + query.getParameterAsClass("count", Integer.class)); + } + } + + private class SearchByIdCallable implements Callable> { + private final Query query; + + private SearchByIdCallable(String id) { + this.query = new Query(); + query.addParameter("id", id); + } + + private SearchByIdCallable(Query query) { + this.query = query; + } + + @Override + public List call() throws Exception { + String id = query.getParameterAsClass("id", String.class); + if (id == null) { + throw new MetadataSourceException("Id cannot be null"); + } + return searchById(id); + } + } + + private class CountByQueryCallable implements Callable { + private final Query query; + + private CountByQueryCallable(String queryString) { + query = new Query(); + query.addParameter("query", queryString); + } + + private CountByQueryCallable(Query query) { + this.query = query; + } + + @Override + public Integer call() throws Exception { + String queryString = query.getParameterAsClass("query", String.class); + if (queryString == null) { + throw new MetadataSourceException("Query cannot be null"); + } + return count(queryString); + } + } +} \ No newline at end of file diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index a4b7e2e457..723ae09860 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -160,7 +160,7 @@ - + @@ -188,7 +188,7 @@ - + @@ -244,4 +244,13 @@ + + + + + + + + diff --git a/dspace/config/spring/api/external-services.xml b/dspace/config/spring/api/external-services.xml index 295ad9af0b..70d8bc125e 100644 --- a/dspace/config/spring/api/external-services.xml +++ b/dspace/config/spring/api/external-services.xml @@ -1,4 +1,13 @@ + @@ -282,4 +291,17 @@ + + + + + + + + Publication + none + + + + diff --git a/dspace/config/spring/api/openalex-integration.xml b/dspace/config/spring/api/openalex-integration.xml new file mode 100644 index 0000000000..8afbd70f20 --- /dev/null +++ b/dspace/config/spring/api/openalex-integration.xml @@ -0,0 +1,138 @@ + + + + + + + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +