mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 18:14:26 +00:00
[CST-5303] porting of Scielo live import service
This commit is contained in:
@@ -0,0 +1,59 @@
|
|||||||
|
/**
|
||||||
|
* The contents of this file are subject to the license and copyright
|
||||||
|
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||||
|
* tree and available online at
|
||||||
|
*
|
||||||
|
* http://www.dspace.org/license/
|
||||||
|
*/
|
||||||
|
package org.dspace.importer.external.metadatamapping.contributor;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig;
|
||||||
|
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This contributor extends SimpleRisToMetadataContributor,
|
||||||
|
* in particular, this one is able to chain multi values into a single one
|
||||||
|
*
|
||||||
|
* @author Mykhaylo Boychuk (mykhaylo.boychuk at 4science.it)
|
||||||
|
*/
|
||||||
|
public class SimpleRisToMetadataConcatContributor extends SimpleRisToMetadataContributor {
|
||||||
|
|
||||||
|
private String tag;
|
||||||
|
|
||||||
|
private MetadataFieldConfig metadata;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<MetadatumDTO> contributeMetadata(Map<String, List<String>> record) {
|
||||||
|
List<MetadatumDTO> values = new LinkedList<>();
|
||||||
|
List<String> fieldValues = record.get(this.tag);
|
||||||
|
Optional.ofNullable(fieldValues)
|
||||||
|
.map(fv -> fv.stream())
|
||||||
|
.map(s -> s.collect(Collectors.joining(" ")))
|
||||||
|
.ifPresent(t -> values.add(this.metadataFieldMapping.toDCValue(this.metadata, t)));
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTag() {
|
||||||
|
return tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTag(String tag) {
|
||||||
|
this.tag = tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MetadataFieldConfig getMetadata() {
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMetadata(MetadataFieldConfig metadata) {
|
||||||
|
this.metadata = metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,71 @@
|
|||||||
|
/**
|
||||||
|
* The contents of this file are subject to the license and copyright
|
||||||
|
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||||
|
* tree and available online at
|
||||||
|
*
|
||||||
|
* http://www.dspace.org/license/
|
||||||
|
*/
|
||||||
|
package org.dspace.importer.external.metadatamapping.contributor;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig;
|
||||||
|
import org.dspace.importer.external.metadatamapping.MetadataFieldMapping;
|
||||||
|
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Metadata contributor that takes a record defined as Map<String,List<String>>
|
||||||
|
* and turns it into metadatums configured in fieldToMetadata
|
||||||
|
*
|
||||||
|
* @author Mykhaylo Boychuk (mykhaylo.boychuk at 4science.it)
|
||||||
|
*/
|
||||||
|
public class SimpleRisToMetadataContributor implements MetadataContributor<Map<String,List<String>>> {
|
||||||
|
|
||||||
|
protected Map<String, MetadataFieldConfig> fieldToMetadata;
|
||||||
|
|
||||||
|
protected MetadataFieldMapping<Map<String,List<String>>,
|
||||||
|
MetadataContributor<Map<String,List<String>>>> metadataFieldMapping;
|
||||||
|
|
||||||
|
public SimpleRisToMetadataContributor() {}
|
||||||
|
|
||||||
|
public SimpleRisToMetadataContributor(Map<String, MetadataFieldConfig> fieldToMetadata) {
|
||||||
|
this.fieldToMetadata = fieldToMetadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<MetadatumDTO> contributeMetadata(Map<String, List<String>> record) {
|
||||||
|
List<MetadatumDTO> values = new LinkedList<>();
|
||||||
|
for (String field : fieldToMetadata.keySet()) {
|
||||||
|
List<String> fieldValues = record.get(field);
|
||||||
|
if (Objects.nonNull(fieldValues)) {
|
||||||
|
for (String value : fieldValues) {
|
||||||
|
values.add(metadataFieldMapping.toDCValue(fieldToMetadata.get(field), value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, MetadataFieldConfig> getFieldToMetadata() {
|
||||||
|
return fieldToMetadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFieldToMetadata(Map<String, MetadataFieldConfig> fieldToMetadata) {
|
||||||
|
this.fieldToMetadata = fieldToMetadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MetadataFieldMapping<Map<String, List<String>>,
|
||||||
|
MetadataContributor<Map<String, List<String>>>> getMetadataFieldMapping() {
|
||||||
|
return metadataFieldMapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMetadataFieldMapping(MetadataFieldMapping<Map<String, List<String>>,
|
||||||
|
MetadataContributor<Map<String, List<String>>>> metadataFieldMapping) {
|
||||||
|
this.metadataFieldMapping = metadataFieldMapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
37
dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloFieldMapping.java
vendored
Normal file
37
dspace-api/src/main/java/org/dspace/importer/external/scielo/service/ScieloFieldMapping.java
vendored
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
/**
|
||||||
|
* The contents of this file are subject to the license and copyright
|
||||||
|
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||||
|
* tree and available online at
|
||||||
|
*
|
||||||
|
* http://www.dspace.org/license/
|
||||||
|
*/
|
||||||
|
package org.dspace.importer.external.scielo.service;
|
||||||
|
import java.util.Map;
|
||||||
|
import javax.annotation.Resource;
|
||||||
|
|
||||||
|
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of {@link AbstractMetadataFieldMapping}
|
||||||
|
* Responsible for defining the mapping of the Scielo metadatum fields on the DSpace metadatum fields
|
||||||
|
*
|
||||||
|
* @author Boychuk Mykhaylo (boychuk.mykhaylo at 4science dot it)
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("rawtypes")
|
||||||
|
public class ScieloFieldMapping extends AbstractMetadataFieldMapping {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
|
||||||
|
* only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
|
||||||
|
* what metadatafield is generated.
|
||||||
|
*
|
||||||
|
* @param metadataFieldMap The map containing the link between retrieve metadata and
|
||||||
|
* metadata that will be set to the item.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Resource(name = "scieloMetadataFieldMap")
|
||||||
|
public void setMetadataFieldMap(Map metadataFieldMap) {
|
||||||
|
super.setMetadataFieldMap(metadataFieldMap);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,230 @@
|
|||||||
|
/**
|
||||||
|
* The contents of this file are subject to the license and copyright
|
||||||
|
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||||
|
* tree and available online at
|
||||||
|
*
|
||||||
|
* http://www.dspace.org/license/
|
||||||
|
*/
|
||||||
|
package org.dspace.importer.external.scielo.service;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import javax.el.MethodNotFoundException;
|
||||||
|
import javax.ws.rs.BadRequestException;
|
||||||
|
|
||||||
|
import org.apache.http.client.utils.URIBuilder;
|
||||||
|
import org.dspace.content.Item;
|
||||||
|
import org.dspace.importer.external.datamodel.ImportRecord;
|
||||||
|
import org.dspace.importer.external.datamodel.Query;
|
||||||
|
import org.dspace.importer.external.exception.FileSourceException;
|
||||||
|
import org.dspace.importer.external.exception.MetadataSourceException;
|
||||||
|
import org.dspace.importer.external.scopus.service.LiveImportClient;
|
||||||
|
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
|
||||||
|
import org.dspace.importer.external.service.components.QuerySource;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements a data source for querying Scielo
|
||||||
|
*
|
||||||
|
* @author Boychuk Mykhaylo (boychuk.mykhaylo at 4Science dot it)
|
||||||
|
*/
|
||||||
|
public class ScieloImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Map<String,List<String>>>
|
||||||
|
implements QuerySource {
|
||||||
|
|
||||||
|
private static final String ENDPOINT_SEARCH_SCIELO = "https://search.scielo.org/?output=ris&q=";
|
||||||
|
|
||||||
|
private static final String PATTERN = "^([A-Z][A-Z0-9]) - (.*)$";
|
||||||
|
|
||||||
|
private static final String ID_PATTERN = "^(.....)-(.*)-(...)$";
|
||||||
|
|
||||||
|
private int timeout = 1000;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private LiveImportClient liveImportClient;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init() throws Exception {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getImportSource() {
|
||||||
|
return "scielo";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
|
||||||
|
return retry(new SearchByQueryCallable(query, count, start));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException {
|
||||||
|
return retry(new SearchByQueryCallable(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ImportRecord getRecord(Query query) throws MetadataSourceException {
|
||||||
|
List<ImportRecord> records = retry(new SearchByQueryCallable(query));
|
||||||
|
return records == null || records.isEmpty() ? null : records.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ImportRecord getRecord(String id) throws MetadataSourceException {
|
||||||
|
List<ImportRecord> records = retry(new FindByIdCallable(id));
|
||||||
|
return records == null || records.isEmpty() ? null : records.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getRecordsCount(String query) throws MetadataSourceException {
|
||||||
|
return retry(new SearchNBByQueryCallable(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getRecordsCount(Query query) throws MetadataSourceException {
|
||||||
|
throw new MethodNotFoundException("This method is not implemented for Scielo");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
|
||||||
|
throw new MethodNotFoundException("This method is not implemented for Scielo");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
|
||||||
|
throw new MethodNotFoundException("This method is not implemented for Scielo");
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SearchNBByQueryCallable implements Callable<Integer> {
|
||||||
|
|
||||||
|
private String query;
|
||||||
|
|
||||||
|
private SearchNBByQueryCallable(String queryString) {
|
||||||
|
this.query = queryString;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchNBByQueryCallable(Query query) {
|
||||||
|
this.query = query.getParameterAsClass("query", String.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer call() throws Exception {
|
||||||
|
String url = ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(query, StandardCharsets.UTF_8);
|
||||||
|
String resp = liveImportClient.executeHttpGetRequest(timeout, url, new HashMap<String, String>());
|
||||||
|
Map<Integer, Map<String, List<String>>> records = getRecords(resp);
|
||||||
|
return Objects.nonNull(records.size()) ? records.size() : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class FindByIdCallable implements Callable<List<ImportRecord>> {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
private FindByIdCallable(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ImportRecord> call() throws Exception {
|
||||||
|
List<ImportRecord> results = new ArrayList<>();
|
||||||
|
String scieloId = id.trim();
|
||||||
|
Pattern risPattern = Pattern.compile(ID_PATTERN);
|
||||||
|
Matcher risMatcher = risPattern.matcher(scieloId);
|
||||||
|
if (risMatcher.matches()) {
|
||||||
|
String url = ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(scieloId, StandardCharsets.UTF_8);
|
||||||
|
String resp = liveImportClient.executeHttpGetRequest(timeout, url, new HashMap<String, String>());
|
||||||
|
Map<Integer, Map<String, List<String>>> records = getRecords(resp);
|
||||||
|
if (Objects.nonNull(records) & !records.isEmpty()) {
|
||||||
|
results.add(transformSourceRecords(records.get(1)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new BadRequestException("id provided : " + scieloId + " is not an ScieloID");
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
|
||||||
|
|
||||||
|
private Query query;
|
||||||
|
|
||||||
|
private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) {
|
||||||
|
query = new Query();
|
||||||
|
query.addParameter("query", queryString);
|
||||||
|
query.addParameter("start", start);
|
||||||
|
query.addParameter("count", maxResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchByQueryCallable(Query query) {
|
||||||
|
this.query = query;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ImportRecord> call() throws Exception {
|
||||||
|
List<ImportRecord> results = new ArrayList<>();
|
||||||
|
String q = query.getParameterAsClass("query", String.class);
|
||||||
|
Integer count = query.getParameterAsClass("count", Integer.class);
|
||||||
|
Integer start = query.getParameterAsClass("start", Integer.class);
|
||||||
|
URIBuilder uriBuilder = new URIBuilder(
|
||||||
|
ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(q, StandardCharsets.UTF_8));
|
||||||
|
uriBuilder.addParameter("start", start.toString());
|
||||||
|
uriBuilder.addParameter("count", count.toString());
|
||||||
|
String resp = liveImportClient.executeHttpGetRequest(timeout, uriBuilder.toString(),
|
||||||
|
new HashMap<String, String>());
|
||||||
|
Map<Integer, Map<String, List<String>>> records = getRecords(resp);
|
||||||
|
for (int record : records.keySet()) {
|
||||||
|
results.add(transformSourceRecords(records.get(record)));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<Integer, Map<String,List<String>>> getRecords(String resp) throws FileSourceException {
|
||||||
|
Map<Integer, Map<String, List<String>>> records = new HashMap<Integer, Map<String,List<String>>>();
|
||||||
|
BufferedReader reader;
|
||||||
|
int countRecord = 0;
|
||||||
|
try {
|
||||||
|
reader = new BufferedReader(new StringReader(resp));
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
if (line.isEmpty() || line.equals("") || line.matches("^\\s*$")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
line = line.replaceAll("\\uFEFF", "").trim();
|
||||||
|
Pattern risPattern = Pattern.compile(PATTERN);
|
||||||
|
Matcher risMatcher = risPattern.matcher(line);
|
||||||
|
if (risMatcher.matches()) {
|
||||||
|
if (risMatcher.group(1).equals("TY") & risMatcher.group(2).equals("JOUR")) {
|
||||||
|
countRecord ++;
|
||||||
|
Map<String,List<String>> newMap = new HashMap<String, List<String>>();
|
||||||
|
records.put(countRecord, newMap);
|
||||||
|
} else {
|
||||||
|
Map<String, List<String>> tag2values = records.get(countRecord);
|
||||||
|
List<String> values = tag2values.get(risMatcher.group(1));
|
||||||
|
if (Objects.isNull(values)) {
|
||||||
|
List<String> newValues = new ArrayList<String>();
|
||||||
|
newValues.add(risMatcher.group(2));
|
||||||
|
tag2values.put(risMatcher.group(1), newValues);
|
||||||
|
} else {
|
||||||
|
values.add(risMatcher.group(2));
|
||||||
|
tag2values.put(risMatcher.group(1), values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new FileSourceException("Cannot parse RIS file", e);
|
||||||
|
}
|
||||||
|
return records;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -116,6 +116,11 @@
|
|||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
|
|
||||||
|
<bean id="ScieloImportService" class="org.dspace.importer.external.scielo.service.ScieloImportMetadataSourceServiceImpl" scope="singleton">
|
||||||
|
<property name="metadataFieldMapping" ref="scieloMetadataFieldMapping"/>
|
||||||
|
</bean>
|
||||||
|
<bean id="scieloMetadataFieldMapping" class="org.dspace.importer.external.scielo.service.ScieloFieldMapping"/>
|
||||||
|
|
||||||
<!-- Metadatafield used to check against if it's already imported or not during the JSONLookupSearcher-->
|
<!-- Metadatafield used to check against if it's already imported or not during the JSONLookupSearcher-->
|
||||||
<bean id="lookupID" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
|
<bean id="lookupID" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
|
||||||
<constructor-arg value="dc.identifier.other"/>
|
<constructor-arg value="dc.identifier.other"/>
|
||||||
|
@@ -94,5 +94,15 @@
|
|||||||
</property>
|
</property>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
</beans>
|
<bean id="scieloLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
|
||||||
|
<property name="metadataSource" ref="ScieloImportService"/>
|
||||||
|
<property name="sourceIdentifier" value="scielo"/>
|
||||||
|
<property name="recordIdMetadata" value="dc.identifier.other"/>
|
||||||
|
<property name="supportedEntityTypes">
|
||||||
|
<list>
|
||||||
|
<value>Publication</value>
|
||||||
|
</list>
|
||||||
|
</property>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
</beans>
|
Reference in New Issue
Block a user