diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleRisToMetadataConcatContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleRisToMetadataConcatContributor.java new file mode 100644 index 0000000000..5dd354c6f1 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleRisToMetadataConcatContributor.java @@ -0,0 +1,59 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; + +/** + * This contributor extends SimpleRisToMetadataContributor, + * in particular, this one is able to chain multi values into a single one + * + * @author Mykhaylo Boychuk (mykhaylo.boychuk at 4science.it) + */ +public class SimpleRisToMetadataConcatContributor extends SimpleRisToMetadataContributor { + + private String tag; + + private MetadataFieldConfig metadata; + + @Override + public Collection contributeMetadata(Map> record) { + List values = new LinkedList<>(); + List fieldValues = record.get(this.tag); + Optional.ofNullable(fieldValues) + .map(fv -> fv.stream()) + .map(s -> s.collect(Collectors.joining(" "))) + .ifPresent(t -> values.add(this.metadataFieldMapping.toDCValue(this.metadata, t))); + return values; + } + + public String getTag() { + return tag; + } + + public void setTag(String tag) { + this.tag = tag; + } + + public MetadataFieldConfig getMetadata() { + return metadata; + } + + public void setMetadata(MetadataFieldConfig metadata) { + this.metadata = metadata; + } + +} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleRisToMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleRisToMetadataContributor.java new file mode 100644 index 0000000000..36ea0dd478 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleRisToMetadataContributor.java @@ -0,0 +1,71 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; +import org.dspace.importer.external.metadatamapping.MetadataFieldMapping; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; + +/** + * Metadata contributor that takes a record defined as Map> + * and turns it into metadatums configured in fieldToMetadata + * + * @author Mykhaylo Boychuk (mykhaylo.boychuk at 4science.it) + */ +public class SimpleRisToMetadataContributor implements MetadataContributor>> { + + protected Map fieldToMetadata; + + protected MetadataFieldMapping>, + MetadataContributor>>> metadataFieldMapping; + + public SimpleRisToMetadataContributor() {} + + public SimpleRisToMetadataContributor(Map fieldToMetadata) { + this.fieldToMetadata = fieldToMetadata; + } + + @Override + public Collection contributeMetadata(Map> record) { + List values = new LinkedList<>(); + for (String field : fieldToMetadata.keySet()) { + List fieldValues = record.get(field); + if (Objects.nonNull(fieldValues)) { + for (String value : fieldValues) { + values.add(metadataFieldMapping.toDCValue(fieldToMetadata.get(field), value)); + } + } + } + return values; + } + + public Map getFieldToMetadata() { + return fieldToMetadata; + } + + public void setFieldToMetadata(Map fieldToMetadata) { + this.fieldToMetadata = fieldToMetadata; + } + + public MetadataFieldMapping>, + MetadataContributor>>> getMetadataFieldMapping() { + return metadataFieldMapping; + } + + public void setMetadataFieldMapping(MetadataFieldMapping>, + MetadataContributor>>> metadataFieldMapping) { + this.metadataFieldMapping = metadataFieldMapping; + } + +} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloFieldMapping.java b/dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloFieldMapping.java new file mode 100644 index 0000000000..0d7183a1f0 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloFieldMapping.java @@ -0,0 +1,37 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.scielo.service; +import java.util.Map; +import javax.annotation.Resource; + +import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping; + +/** + * An implementation of {@link AbstractMetadataFieldMapping} + * Responsible for defining the mapping of the Scielo metadatum fields on the DSpace metadatum fields + * + * @author Boychuk Mykhaylo (boychuk.mykhaylo at 4science dot it) + */ +@SuppressWarnings("rawtypes") +public class ScieloFieldMapping extends AbstractMetadataFieldMapping { + + /** + * Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + * only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + * what metadatafield is generated. + * + * @param metadataFieldMap The map containing the link between retrieve metadata and + * metadata that will be set to the item. + */ + @Override + @SuppressWarnings("unchecked") + @Resource(name = "scieloMetadataFieldMap") + public void setMetadataFieldMap(Map metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } +} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloImportMetadataSourceServiceImpl.java new file mode 100644 index 0000000000..2ebe520fde --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloImportMetadataSourceServiceImpl.java @@ -0,0 +1,230 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.scielo.service; + +import java.io.BufferedReader; +import java.io.StringReader; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.Callable; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.el.MethodNotFoundException; +import javax.ws.rs.BadRequestException; + +import org.apache.http.client.utils.URIBuilder; +import org.dspace.content.Item; +import org.dspace.importer.external.datamodel.ImportRecord; +import org.dspace.importer.external.datamodel.Query; +import org.dspace.importer.external.exception.FileSourceException; +import org.dspace.importer.external.exception.MetadataSourceException; +import org.dspace.importer.external.scopus.service.LiveImportClient; +import org.dspace.importer.external.service.AbstractImportMetadataSourceService; +import org.dspace.importer.external.service.components.QuerySource; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Implements a data source for querying Scielo + * + * @author Boychuk Mykhaylo (boychuk.mykhaylo at 4Science dot it) + */ +public class ScieloImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService>> + implements QuerySource { + + private static final String ENDPOINT_SEARCH_SCIELO = "https://search.scielo.org/?output=ris&q="; + + private static final String PATTERN = "^([A-Z][A-Z0-9]) - (.*)$"; + + private static final String ID_PATTERN = "^(.....)-(.*)-(...)$"; + + private int timeout = 1000; + + @Autowired + private LiveImportClient liveImportClient; + + @Override + public void init() throws Exception {} + + @Override + public String getImportSource() { + return "scielo"; + } + + @Override + public Collection getRecords(String query, int start, int count) throws MetadataSourceException { + return retry(new SearchByQueryCallable(query, count, start)); + } + + @Override + public Collection getRecords(Query query) throws MetadataSourceException { + return retry(new SearchByQueryCallable(query)); + } + + + @Override + public ImportRecord getRecord(Query query) throws MetadataSourceException { + List records = retry(new SearchByQueryCallable(query)); + return records == null || records.isEmpty() ? null : records.get(0); + } + + @Override + public ImportRecord getRecord(String id) throws MetadataSourceException { + List records = retry(new FindByIdCallable(id)); + return records == null || records.isEmpty() ? null : records.get(0); + } + + @Override + public int getRecordsCount(String query) throws MetadataSourceException { + return retry(new SearchNBByQueryCallable(query)); + } + + @Override + public int getRecordsCount(Query query) throws MetadataSourceException { + throw new MethodNotFoundException("This method is not implemented for Scielo"); + } + + @Override + public Collection findMatchingRecords(Item item) throws MetadataSourceException { + throw new MethodNotFoundException("This method is not implemented for Scielo"); + } + + @Override + public Collection findMatchingRecords(Query query) throws MetadataSourceException { + throw new MethodNotFoundException("This method is not implemented for Scielo"); + } + + private class SearchNBByQueryCallable implements Callable { + + private String query; + + private SearchNBByQueryCallable(String queryString) { + this.query = queryString; + } + + private SearchNBByQueryCallable(Query query) { + this.query = query.getParameterAsClass("query", String.class); + } + + @Override + public Integer call() throws Exception { + String url = ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(query, StandardCharsets.UTF_8); + String resp = liveImportClient.executeHttpGetRequest(timeout, url, new HashMap()); + Map>> records = getRecords(resp); + return Objects.nonNull(records.size()) ? records.size() : 0; + } + } + + private class FindByIdCallable implements Callable> { + + private String id; + + private FindByIdCallable(String id) { + this.id = id; + } + + @Override + public List call() throws Exception { + List results = new ArrayList<>(); + String scieloId = id.trim(); + Pattern risPattern = Pattern.compile(ID_PATTERN); + Matcher risMatcher = risPattern.matcher(scieloId); + if (risMatcher.matches()) { + String url = ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(scieloId, StandardCharsets.UTF_8); + String resp = liveImportClient.executeHttpGetRequest(timeout, url, new HashMap()); + Map>> records = getRecords(resp); + if (Objects.nonNull(records) & !records.isEmpty()) { + results.add(transformSourceRecords(records.get(1))); + } + } else { + throw new BadRequestException("id provided : " + scieloId + " is not an ScieloID"); + } + return results; + } + } + + private class SearchByQueryCallable implements Callable> { + + private Query query; + + private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) { + query = new Query(); + query.addParameter("query", queryString); + query.addParameter("start", start); + query.addParameter("count", maxResult); + } + + private SearchByQueryCallable(Query query) { + this.query = query; + } + + @Override + public List call() throws Exception { + List results = new ArrayList<>(); + String q = query.getParameterAsClass("query", String.class); + Integer count = query.getParameterAsClass("count", Integer.class); + Integer start = query.getParameterAsClass("start", Integer.class); + URIBuilder uriBuilder = new URIBuilder( + ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(q, StandardCharsets.UTF_8)); + uriBuilder.addParameter("start", start.toString()); + uriBuilder.addParameter("count", count.toString()); + String resp = liveImportClient.executeHttpGetRequest(timeout, uriBuilder.toString(), + new HashMap()); + Map>> records = getRecords(resp); + for (int record : records.keySet()) { + results.add(transformSourceRecords(records.get(record))); + } + return results; + } + } + + private Map>> getRecords(String resp) throws FileSourceException { + Map>> records = new HashMap>>(); + BufferedReader reader; + int countRecord = 0; + try { + reader = new BufferedReader(new StringReader(resp)); + String line; + while ((line = reader.readLine()) != null) { + if (line.isEmpty() || line.equals("") || line.matches("^\\s*$")) { + continue; + } + line = line.replaceAll("\\uFEFF", "").trim(); + Pattern risPattern = Pattern.compile(PATTERN); + Matcher risMatcher = risPattern.matcher(line); + if (risMatcher.matches()) { + if (risMatcher.group(1).equals("TY") & risMatcher.group(2).equals("JOUR")) { + countRecord ++; + Map> newMap = new HashMap>(); + records.put(countRecord, newMap); + } else { + Map> tag2values = records.get(countRecord); + List values = tag2values.get(risMatcher.group(1)); + if (Objects.isNull(values)) { + List newValues = new ArrayList(); + newValues.add(risMatcher.group(2)); + tag2values.put(risMatcher.group(1), newValues); + } else { + values.add(risMatcher.group(2)); + tag2values.put(risMatcher.group(1), values); + } + } + } + } + } catch (Exception e) { + throw new FileSourceException("Cannot parse RIS file", e); + } + return records; + } + +} \ No newline at end of file diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index 5e69ee9c42..7686986ae8 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -116,6 +116,11 @@ + + + + + diff --git a/dspace/config/spring/api/external-services.xml b/dspace/config/spring/api/external-services.xml index 7f1295f839..2259ccb9f9 100644 --- a/dspace/config/spring/api/external-services.xml +++ b/dspace/config/spring/api/external-services.xml @@ -94,5 +94,15 @@ - + + + + + + + Publication + + + + \ No newline at end of file