[CST-5303] refactored crossref & vufind json processors

This commit is contained in:
Mykhaylo
2022-05-10 10:50:59 +02:00
parent 160a726505
commit c490f28c4d
9 changed files with 217 additions and 18 deletions

View File

@@ -0,0 +1,83 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.crossref;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.importer.external.metadatamapping.contributor.JsonPathMetadataProcessor;
/**
* This Processor allows to extract attribute values of an array.
* For exaple to extract all values of secondAttribute,
* "array":[
* {
* "firstAttribute":"first value",
* "secondAttribute":"second value"
* },
* {
* "firstAttribute":"first value",
* "secondAttribute":"second value"
* }
* ]
*
* it's possible configure a bean with
* pathToArray=/array and elementAttribute=/secondAttribute
*
* @author Mykhaylo Boychuk (mykhaylo.boychuk@4science.com)
*/
public class ArrayElementAttributeProcessor implements JsonPathMetadataProcessor {
private final static Logger log = LogManager.getLogger();
private String pathToArray;
private String elementAttribute;
@Override
public Collection<String> processMetadata(String json) {
JsonNode rootNode = convertStringJsonToJsonNode(json);
Iterator<JsonNode> array = rootNode.at(pathToArray).iterator();
Collection<String> values = new ArrayList<>();
while (array.hasNext()) {
JsonNode element = array.next();
String value = element.at(elementAttribute).textValue();
if (StringUtils.isNoneBlank(value)) {
values.add(value);
}
}
return values;
}
private JsonNode convertStringJsonToJsonNode(String json) {
ObjectMapper mapper = new ObjectMapper();
JsonNode body = null;
try {
body = mapper.readTree(json);
} catch (JsonProcessingException e) {
log.error("Unable to process json response.", e);
}
return body;
}
public void setPathToArray(String pathToArray) {
this.pathToArray = pathToArray;
}
public void setElementAttribute(String elementAttribute) {
this.elementAttribute = elementAttribute;
}
}

View File

@@ -0,0 +1,88 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.crossref;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.importer.external.metadatamapping.contributor.JsonPathMetadataProcessor;
/**
* This Processor allows to extract all values of a matrix.
* Only need to configure the path to the matrix in "pathToMatrix"
* For exaple to extract all values
* "matrix": [
* [
* "first",
* "second"
* ],
* [
* "third"
* ],
* [
* "fourth",
* "fifth"
* ]
* ],
*
* @author Mykhaylo Boychuk (mykhaylo.boychuk@4science.com)
*/
public class MatrixElementProcessor implements JsonPathMetadataProcessor {
private final static Logger log = LogManager.getLogger();
private String pathToMatrix;
@Override
public Collection<String> processMetadata(String json) {
JsonNode rootNode = convertStringJsonToJsonNode(json);
Iterator<JsonNode> array = rootNode.at(pathToMatrix).elements();
Collection<String> values = new ArrayList<>();
while (array.hasNext()) {
JsonNode element = array.next();
if (element.isArray()) {
Iterator<JsonNode> nodes = element.iterator();
while (nodes.hasNext()) {
String nodeValue = nodes.next().textValue();
if (StringUtils.isNotBlank(nodeValue)) {
values.add(nodeValue);
}
}
} else {
String nodeValue = element.textValue();
if (StringUtils.isNotBlank(nodeValue)) {
values.add(nodeValue);
}
}
}
return values;
}
private JsonNode convertStringJsonToJsonNode(String json) {
ObjectMapper mapper = new ObjectMapper();
JsonNode body = null;
try {
body = mapper.readTree(json);
} catch (JsonProcessingException e) {
log.error("Unable to process json response.", e);
}
return body;
}
public void setPathToMatrix(String pathToMatrix) {
this.pathToMatrix = pathToMatrix;
}
}

View File

@@ -9,6 +9,13 @@ package org.dspace.importer.external.metadatamapping.contributor;
import java.util.Collection;
/**
* Service interface class for processing json object.
* The implementation of this class is responsible for all business logic calls
* for extracting of values from json object.
*
* @author Mykhaylo Boychuk (mykhaylo.boychuk@4science.com)
*/
public interface JsonPathMetadataProcessor {
public Collection<String> processMetadata(String json);

View File

@@ -10,6 +10,7 @@ package org.dspace.importer.external.metadatamapping.contributor;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.Objects;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
@@ -118,7 +119,7 @@ public class SimpleJsonPathMetadataContributor implements MetadataContributor<St
public Collection<MetadatumDTO> contributeMetadata(String fullJson) {
Collection<MetadatumDTO> metadata = new ArrayList<>();
Collection<String> metadataValue = new ArrayList<>();
if (metadataProcessor != null) {
if (Objects.nonNull(metadataProcessor)) {
metadataValue = metadataProcessor.processMetadata(fullJson);
} else {
JsonNode jsonNode = convertStringJsonToJsonNode(fullJson);
@@ -126,13 +127,13 @@ public class SimpleJsonPathMetadataContributor implements MetadataContributor<St
if (node.isArray()) {
Iterator<JsonNode> nodes = node.iterator();
while (nodes.hasNext()) {
String nodeValue = nodes.next().textValue();
String nodeValue = getStringValue(nodes.next());
if (StringUtils.isNotBlank(nodeValue)) {
metadataValue.add(nodeValue);
}
}
} else {
String nodeValue = node.textValue();
String nodeValue = getStringValue(node);
if (StringUtils.isNotBlank(nodeValue)) {
metadataValue.add(nodeValue);
}
@@ -149,6 +150,16 @@ public class SimpleJsonPathMetadataContributor implements MetadataContributor<St
return metadata;
}
private String getStringValue(JsonNode node) {
if (node.isTextual()) {
return node.textValue();
} else if (node.isNumber()) {
return node.numberValue().toString();
}
log.error("It wasn't possible to convert the value of the following JsonNode:" + node.asText());
return StringUtils.EMPTY;
}
private JsonNode convertStringJsonToJsonNode(String json) {
ObjectMapper mapper = new ObjectMapper();
JsonNode body = null;

View File

@@ -32,7 +32,12 @@
]
],
"title": "La pianta marmorea di Roma antica: Forma urbis Romae /",
"urls": []
"urls": [
{
"url": "http://hdl.handle.net/20.500.12390/231",
"desc": "http://hdl.handle.net/20.500.12390/231"
}
]
},
{
"authors": {

View File

@@ -8,6 +8,7 @@
package org.dspace.app.rest;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -39,21 +40,18 @@ public class AbstractLiveImportIntegrationTest extends AbstractControllerIntegra
}
}
private boolean checkMetadataValue(List<MetadatumDTO> list, List<MetadatumDTO> list2) {
for (MetadatumDTO metadatum : list) {
for (MetadatumDTO metadatum2 : list2) {
if (sameMetadatum(metadatum, metadatum2)) {
assertEquals(metadatum.getValue(), metadatum2.getValue());
}
}
private void checkMetadataValue(List<MetadatumDTO> list, List<MetadatumDTO> list2) {
assertEquals(list.size(), list2.size());
for (int i = 0; i < list.size(); i++) {
assertTrue(sameMetadatum(list.get(i), list2.get(i)));
}
return true;
}
private boolean sameMetadatum(MetadatumDTO metadatum, MetadatumDTO metadatum2) {
if (StringUtils.equals(metadatum.getSchema(), metadatum2.getSchema()) &&
StringUtils.equals(metadatum.getElement(), metadatum2.getElement()) &&
StringUtils.equals(metadatum.getQualifier(), metadatum2.getQualifier())) {
StringUtils.equals(metadatum.getQualifier(), metadatum2.getQualifier()) &&
StringUtils.equals(metadatum.getValue(), metadatum2.getValue())) {
return true;
}
return false;

View File

@@ -105,8 +105,8 @@ public class CrossRefImportMetadataSourceServiceIT extends AbstractLiveImportInt
metadatums.add(title);
metadatums.add(author);
metadatums.add(type);
metadatums.add(date);
metadatums.add(type);
metadatums.add(ispartof);
metadatums.add(doi);
metadatums.add(issn);
@@ -131,8 +131,8 @@ public class CrossRefImportMetadataSourceServiceIT extends AbstractLiveImportInt
metadatums2.add(title2);
metadatums2.add(author2);
metadatums2.add(type2);
metadatums2.add(date2);
metadatums2.add(type2);
metadatums2.add(ispartof2);
metadatums2.add(doi2);
metadatums2.add(issn2);

View File

@@ -21,7 +21,7 @@
<entry key-ref="crossref.title" value-ref="crossrefTitleContrib"/>
<entry key-ref="crossref.authors" value-ref="crossrefAuthorContrib"/>
<entry key-ref="crossref.isbn" value-ref="crossrefISBNContrib"/>
<entry key-ref="crossref.year" value-ref="crossrefYearContrib"/> <!-- rivedere -->
<entry key-ref="crossref.year" value-ref="crossrefYearContrib"/>
<entry key-ref="crossref.editors" value-ref="crossrefEditorsContrib"/>
<entry key-ref="crossref.type" value-ref="crossrefDoiTypeContrib"/>
<entry key-ref="crossref.journal" value-ref="crossrefJournalContrib"/>

View File

@@ -58,16 +58,23 @@
<bean id="vufindIdentifier" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="field" ref="vufind.dc.identifier"/>
<property name="query" value="/urls/url"/>
<property name="metadataProcessor" ref="vufindUriProcessor"></property>
</bean>
<bean name="vufindUriProcessor" class="org.dspace.importer.external.crossref.ArrayElementAttributeProcessor">
<property name="pathToArray" value="/urls"></property>
<property name="elementAttribute" value="/url"></property>
</bean>
<bean id="vufind.dc.identifier" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.identifier"/>
</bean>
<bean id="vufindSubject" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="field" ref="vufind.dc.subject"/>
<property name="query" value="$.subjects[*][*]"/>
<property name="metadataProcessor" ref="vufindSubjectsProcessor"></property>
</bean>
<bean name="vufindSubjectsProcessor" class="org.dspace.importer.external.crossref.MatrixElementProcessor">
<property name="pathToMatrix" value="/subjects"></property>
</bean>
<bean id="vufind.dc.subject" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.subject"/>
</bean>