[CST-5303] porting of Scielo live import service

This commit is contained in:
Mykhaylo
2022-04-04 15:02:29 +02:00
parent 1050d02a97
commit e0913ccc5c
6 changed files with 413 additions and 1 deletions

View File

@@ -0,0 +1,59 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.metadatamapping.contributor;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
/**
* This contributor extends SimpleRisToMetadataContributor,
* in particular, this one is able to chain multi values into a single one
*
* @author Mykhaylo Boychuk (mykhaylo.boychuk at 4science.it)
*/
public class SimpleRisToMetadataConcatContributor extends SimpleRisToMetadataContributor {
private String tag;
private MetadataFieldConfig metadata;
@Override
public Collection<MetadatumDTO> contributeMetadata(Map<String, List<String>> record) {
List<MetadatumDTO> values = new LinkedList<>();
List<String> fieldValues = record.get(this.tag);
Optional.ofNullable(fieldValues)
.map(fv -> fv.stream())
.map(s -> s.collect(Collectors.joining(" ")))
.ifPresent(t -> values.add(this.metadataFieldMapping.toDCValue(this.metadata, t)));
return values;
}
public String getTag() {
return tag;
}
public void setTag(String tag) {
this.tag = tag;
}
public MetadataFieldConfig getMetadata() {
return metadata;
}
public void setMetadata(MetadataFieldConfig metadata) {
this.metadata = metadata;
}
}

View File

@@ -0,0 +1,71 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.metadatamapping.contributor;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig;
import org.dspace.importer.external.metadatamapping.MetadataFieldMapping;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
/**
* Metadata contributor that takes a record defined as Map<String,List<String>>
* and turns it into metadatums configured in fieldToMetadata
*
* @author Mykhaylo Boychuk (mykhaylo.boychuk at 4science.it)
*/
public class SimpleRisToMetadataContributor implements MetadataContributor<Map<String,List<String>>> {
protected Map<String, MetadataFieldConfig> fieldToMetadata;
protected MetadataFieldMapping<Map<String,List<String>>,
MetadataContributor<Map<String,List<String>>>> metadataFieldMapping;
public SimpleRisToMetadataContributor() {}
public SimpleRisToMetadataContributor(Map<String, MetadataFieldConfig> fieldToMetadata) {
this.fieldToMetadata = fieldToMetadata;
}
@Override
public Collection<MetadatumDTO> contributeMetadata(Map<String, List<String>> record) {
List<MetadatumDTO> values = new LinkedList<>();
for (String field : fieldToMetadata.keySet()) {
List<String> fieldValues = record.get(field);
if (Objects.nonNull(fieldValues)) {
for (String value : fieldValues) {
values.add(metadataFieldMapping.toDCValue(fieldToMetadata.get(field), value));
}
}
}
return values;
}
public Map<String, MetadataFieldConfig> getFieldToMetadata() {
return fieldToMetadata;
}
public void setFieldToMetadata(Map<String, MetadataFieldConfig> fieldToMetadata) {
this.fieldToMetadata = fieldToMetadata;
}
public MetadataFieldMapping<Map<String, List<String>>,
MetadataContributor<Map<String, List<String>>>> getMetadataFieldMapping() {
return metadataFieldMapping;
}
public void setMetadataFieldMapping(MetadataFieldMapping<Map<String, List<String>>,
MetadataContributor<Map<String, List<String>>>> metadataFieldMapping) {
this.metadataFieldMapping = metadataFieldMapping;
}
}

View File

@@ -0,0 +1,37 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.scielo.service;
import java.util.Map;
import javax.annotation.Resource;
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;
/**
* An implementation of {@link AbstractMetadataFieldMapping}
* Responsible for defining the mapping of the Scielo metadatum fields on the DSpace metadatum fields
*
* @author Boychuk Mykhaylo (boychuk.mykhaylo at 4science dot it)
*/
@SuppressWarnings("rawtypes")
public class ScieloFieldMapping extends AbstractMetadataFieldMapping {
/**
* Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
* only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
* what metadatafield is generated.
*
* @param metadataFieldMap The map containing the link between retrieve metadata and
* metadata that will be set to the item.
*/
@Override
@SuppressWarnings("unchecked")
@Resource(name = "scieloMetadataFieldMap")
public void setMetadataFieldMap(Map metadataFieldMap) {
super.setMetadataFieldMap(metadataFieldMap);
}
}

View File

@@ -0,0 +1,230 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.scielo.service;
import java.io.BufferedReader;
import java.io.StringReader;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.el.MethodNotFoundException;
import javax.ws.rs.BadRequestException;
import org.apache.http.client.utils.URIBuilder;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
import org.dspace.importer.external.exception.FileSourceException;
import org.dspace.importer.external.exception.MetadataSourceException;
import org.dspace.importer.external.scopus.service.LiveImportClient;
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
import org.dspace.importer.external.service.components.QuerySource;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Implements a data source for querying Scielo
*
* @author Boychuk Mykhaylo (boychuk.mykhaylo at 4Science dot it)
*/
public class ScieloImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<Map<String,List<String>>>
implements QuerySource {
private static final String ENDPOINT_SEARCH_SCIELO = "https://search.scielo.org/?output=ris&q=";
private static final String PATTERN = "^([A-Z][A-Z0-9]) - (.*)$";
private static final String ID_PATTERN = "^(.....)-(.*)-(...)$";
private int timeout = 1000;
@Autowired
private LiveImportClient liveImportClient;
@Override
public void init() throws Exception {}
@Override
public String getImportSource() {
return "scielo";
}
@Override
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
return retry(new SearchByQueryCallable(query, count, start));
}
@Override
public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException {
return retry(new SearchByQueryCallable(query));
}
@Override
public ImportRecord getRecord(Query query) throws MetadataSourceException {
List<ImportRecord> records = retry(new SearchByQueryCallable(query));
return records == null || records.isEmpty() ? null : records.get(0);
}
@Override
public ImportRecord getRecord(String id) throws MetadataSourceException {
List<ImportRecord> records = retry(new FindByIdCallable(id));
return records == null || records.isEmpty() ? null : records.get(0);
}
@Override
public int getRecordsCount(String query) throws MetadataSourceException {
return retry(new SearchNBByQueryCallable(query));
}
@Override
public int getRecordsCount(Query query) throws MetadataSourceException {
throw new MethodNotFoundException("This method is not implemented for Scielo");
}
@Override
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
throw new MethodNotFoundException("This method is not implemented for Scielo");
}
@Override
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
throw new MethodNotFoundException("This method is not implemented for Scielo");
}
private class SearchNBByQueryCallable implements Callable<Integer> {
private String query;
private SearchNBByQueryCallable(String queryString) {
this.query = queryString;
}
private SearchNBByQueryCallable(Query query) {
this.query = query.getParameterAsClass("query", String.class);
}
@Override
public Integer call() throws Exception {
String url = ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(query, StandardCharsets.UTF_8);
String resp = liveImportClient.executeHttpGetRequest(timeout, url, new HashMap<String, String>());
Map<Integer, Map<String, List<String>>> records = getRecords(resp);
return Objects.nonNull(records.size()) ? records.size() : 0;
}
}
private class FindByIdCallable implements Callable<List<ImportRecord>> {
private String id;
private FindByIdCallable(String id) {
this.id = id;
}
@Override
public List<ImportRecord> call() throws Exception {
List<ImportRecord> results = new ArrayList<>();
String scieloId = id.trim();
Pattern risPattern = Pattern.compile(ID_PATTERN);
Matcher risMatcher = risPattern.matcher(scieloId);
if (risMatcher.matches()) {
String url = ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(scieloId, StandardCharsets.UTF_8);
String resp = liveImportClient.executeHttpGetRequest(timeout, url, new HashMap<String, String>());
Map<Integer, Map<String, List<String>>> records = getRecords(resp);
if (Objects.nonNull(records) & !records.isEmpty()) {
results.add(transformSourceRecords(records.get(1)));
}
} else {
throw new BadRequestException("id provided : " + scieloId + " is not an ScieloID");
}
return results;
}
}
private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
private Query query;
private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) {
query = new Query();
query.addParameter("query", queryString);
query.addParameter("start", start);
query.addParameter("count", maxResult);
}
private SearchByQueryCallable(Query query) {
this.query = query;
}
@Override
public List<ImportRecord> call() throws Exception {
List<ImportRecord> results = new ArrayList<>();
String q = query.getParameterAsClass("query", String.class);
Integer count = query.getParameterAsClass("count", Integer.class);
Integer start = query.getParameterAsClass("start", Integer.class);
URIBuilder uriBuilder = new URIBuilder(
ENDPOINT_SEARCH_SCIELO + URLEncoder.encode(q, StandardCharsets.UTF_8));
uriBuilder.addParameter("start", start.toString());
uriBuilder.addParameter("count", count.toString());
String resp = liveImportClient.executeHttpGetRequest(timeout, uriBuilder.toString(),
new HashMap<String, String>());
Map<Integer, Map<String, List<String>>> records = getRecords(resp);
for (int record : records.keySet()) {
results.add(transformSourceRecords(records.get(record)));
}
return results;
}
}
private Map<Integer, Map<String,List<String>>> getRecords(String resp) throws FileSourceException {
Map<Integer, Map<String, List<String>>> records = new HashMap<Integer, Map<String,List<String>>>();
BufferedReader reader;
int countRecord = 0;
try {
reader = new BufferedReader(new StringReader(resp));
String line;
while ((line = reader.readLine()) != null) {
if (line.isEmpty() || line.equals("") || line.matches("^\\s*$")) {
continue;
}
line = line.replaceAll("\\uFEFF", "").trim();
Pattern risPattern = Pattern.compile(PATTERN);
Matcher risMatcher = risPattern.matcher(line);
if (risMatcher.matches()) {
if (risMatcher.group(1).equals("TY") & risMatcher.group(2).equals("JOUR")) {
countRecord ++;
Map<String,List<String>> newMap = new HashMap<String, List<String>>();
records.put(countRecord, newMap);
} else {
Map<String, List<String>> tag2values = records.get(countRecord);
List<String> values = tag2values.get(risMatcher.group(1));
if (Objects.isNull(values)) {
List<String> newValues = new ArrayList<String>();
newValues.add(risMatcher.group(2));
tag2values.put(risMatcher.group(1), newValues);
} else {
values.add(risMatcher.group(2));
tag2values.put(risMatcher.group(1), values);
}
}
}
}
} catch (Exception e) {
throw new FileSourceException("Cannot parse RIS file", e);
}
return records;
}
}

View File

@@ -116,6 +116,11 @@
</bean> </bean>
<bean id="ScieloImportService" class="org.dspace.importer.external.scielo.service.ScieloImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="scieloMetadataFieldMapping"/>
</bean>
<bean id="scieloMetadataFieldMapping" class="org.dspace.importer.external.scielo.service.ScieloFieldMapping"/>
<!-- Metadatafield used to check against if it's already imported or not during the JSONLookupSearcher--> <!-- Metadatafield used to check against if it's already imported or not during the JSONLookupSearcher-->
<bean id="lookupID" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig"> <bean id="lookupID" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.identifier.other"/> <constructor-arg value="dc.identifier.other"/>

View File

@@ -94,5 +94,15 @@
</property> </property>
</bean> </bean>
</beans> <bean id="scieloLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="ScieloImportService"/>
<property name="sourceIdentifier" value="scielo"/>
<property name="recordIdMetadata" value="dc.identifier.other"/>
<property name="supportedEntityTypes">
<list>
<value>Publication</value>
</list>
</property>
</bean>
</beans>