Add endnote support and (partial) test

This commit is contained in:
Pasquale Cavallo
2020-07-06 16:35:48 +02:00
parent 3c349cb70c
commit b7949a14d7
6 changed files with 323 additions and 1 deletions

View File

@@ -0,0 +1,99 @@
package org.dspace.importer.external.endnote.service;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dspace.importer.external.exception.FileSourceException;
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig;
import org.dspace.importer.external.metadatamapping.contributor.MetadataContributor;
import org.dspace.importer.external.service.components.AbstractPlainMetadataSource;
import org.dspace.importer.external.service.components.dto.PlainMetadataKeyValueItem;
import org.dspace.importer.external.service.components.dto.PlainMetadataSourceDto;
public class EndnoteImportMetadataSourceServiceImpl extends AbstractPlainMetadataSource {
@Override
public String getImportSource() {
return "EndnoteMetadataSource";
}
@Override
protected List<PlainMetadataSourceDto> readData(InputStream fileInpuStream) throws FileSourceException {
List<PlainMetadataSourceDto> list = new ArrayList<>();
try {
int lineForDebug = 3;
List<PlainMetadataKeyValueItem> tokenized = tokenize(fileInpuStream);
List<PlainMetadataKeyValueItem> tmpList = new ArrayList<>();
for (PlainMetadataKeyValueItem item : tokenized) {
if (item.getKey() == null || item.getKey().isEmpty()) {
throw new FileSourceException("Null or empty key expected on line "
+ lineForDebug + ". Keys cannot be null nor empty");
}
if ("EF".equals(item.getKey())) {
break;
}
if ("ER".equals(item.getKey())) {
PlainMetadataSourceDto dto = new PlainMetadataSourceDto();
dto.setMetadata(new ArrayList<>(tmpList));
list.add(dto);
tmpList = new ArrayList<>();
} else {
if (item.getValue() == null || item.getValue().isEmpty()) {
throw new FileSourceException("Null or empty value expected on line "
+ lineForDebug + ". Value expected");
}
tmpList.add(item);
}
lineForDebug++;
}
} catch (Exception e) {
throw new FileSourceException("Error reading file");
}
return list;
}
private List<PlainMetadataKeyValueItem> tokenize(InputStream fileInpuStream)
throws IOException, FileSourceException {
BufferedReader reader = new BufferedReader(new InputStreamReader(fileInpuStream));
String line;
line = reader.readLine();
if (line == null || !line.startsWith("FN")) {
throw new FileSourceException("Invalid endNote file");
}
line = reader.readLine();
if (line == null || !line.startsWith("VR")) {
throw new FileSourceException("Invalid endNote file");
}
Pattern pattern = Pattern.compile("(^[A-Z]{2}) ?(.*)$");
List<PlainMetadataKeyValueItem> list = new ArrayList<PlainMetadataKeyValueItem>();
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.isEmpty() || line.equals("")) {
continue;
}
Matcher matcher = pattern.matcher(line);
if (matcher.matches()) {
PlainMetadataKeyValueItem item = new PlainMetadataKeyValueItem();
item.setKey(matcher.group(1));
item.setValue(matcher.group(2));
list.add(item);
}
}
return list;
}
@Override
public void setMetadataFieldMap(Map<MetadataFieldConfig,
MetadataContributor<PlainMetadataSourceDto>> metadataFieldMap) {
super.setMetadataFieldMap(metadataFieldMap);
}
}

View File

@@ -71,6 +71,12 @@
<property name="metadataFieldMap" ref="tsvMetadataFieldMap" />
</bean>
<bean id="EndnoteImportService"
class="org.dspace.importer.external.endnote.service.EndnoteImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMap" ref="endnoteMetadataFieldMap"></property>
</bean>
<!-- Metadatafield used to check against if it's already imported or not during the JSONLookupSearcher-->
<bean id="lookupID" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.identifier.other"/>

View File

@@ -871,7 +871,7 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration
*
* @throws Exception
*/
public void createSingleWorkspaceItemFromFileWithOneEntryTest() throws Exception {
public void createSingleWorkspaceItemFromBibtexFileWithOneEntryTest() throws Exception {
context.turnOffAuthorisationSystem();
//** GIVEN **
@@ -938,7 +938,170 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration
bibtex.close();
}
@Test
/**
* Test the creation of workspaceitems POSTing to the resource collection endpoint a csv file
*
* @throws Exception
*/
public void createSingleWorkspaceItemFromCSVWithOneEntryTest() throws Exception {
context.turnOffAuthorisationSystem();
//** GIVEN **
//1. A community-collection structure with one parent community with sub-community and two collections.
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
.withName("Sub Community")
.build();
Collection col1 = CollectionBuilder.createCollection(context, child1)
.withName("Collection 1")
.withSubmitterGroup(eperson)
.build();
Collection col2 = CollectionBuilder.createCollection(context, child1)
.withName("Collection 2")
.withSubmitterGroup(eperson)
.build();
InputStream csv = getClass().getResourceAsStream("csv-test.csv");
final MockMultipartFile csvFile = new MockMultipartFile("file", "/local/path/csv-test.csv",
"text/csv", csv);
context.restoreAuthSystemState();
String authToken = getAuthToken(eperson.getEmail(), password);
// bulk create workspaceitems in the default collection (col1)
getClient(authToken).perform(fileUpload("/api/submission/workspaceitems")
.file(csvFile))
// bulk create should return 200, 201 (created) is better for single resource
.andExpect(status().isOk())
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value",
is("My Article")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.author'][0].value",
is("Nobody")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone"
+ "['dc.date.issued'][0].value",
is("2006")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.source'][0].value",
is("My Journal")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone"
+ "['dc.identifier.issn'][0].value",
is("Mock ISSN")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value",
is("Mock subtype")))
.andExpect(
jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString())))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]"
+ ".metadata['dc.source'][0].value",
is("/local/path/csv-test.csv")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]"
+ ".metadata['dc.title'][0].value",
is("csv-test.csv")))
.andExpect(
jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist());
;
// bulk create workspaceitems explicitly in the col2
getClient(authToken).perform(fileUpload("/api/submission/workspaceitems")
.file(csvFile)
.param("owningCollection", col2.getID().toString()))
.andExpect(status().isOk())
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone"
+ "['dc.title'][0].value",
is("My Article")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.author'][0].value",
is("Nobody")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone"
+ "['dc.date.issued'][0].value",
is("2006")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.source'][0].value",
is("My Journal")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone"
+ "['dc.identifier.issn'][0].value",
is("Mock ISSN")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value",
is("Mock subtype")))
.andExpect(
jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col2.getID().toString())))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]"
+ ".metadata['dc.source'][0].value",
is("/local/path/csv-test.csv")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload"
+ ".files[0].metadata['dc.title'][0].value",
is("csv-test.csv")))
.andExpect(
jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist());
csv.close();
}
@Test
/**
* Test the creation of workspaceitems POSTing to the resource collection endpoint a csv file
* with some missing data
*
* @throws Exception
*/
public void createSingleWorkspaceItemFromCSVWithOneEntryAndMissingDataTest() throws Exception {
context.turnOffAuthorisationSystem();
//** GIVEN **
//1. A community-collection structure with one parent community with sub-community and two collections.
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
.withName("Sub Community")
.build();
Collection col1 = CollectionBuilder.createCollection(context, child1)
.withName("Collection 1")
.withSubmitterGroup(eperson)
.build();
Collection col2 = CollectionBuilder.createCollection(context, child1)
.withName("Collection 2")
.withSubmitterGroup(eperson)
.build();
InputStream csv = getClass().getResourceAsStream("bibtex-test.bib");
final MockMultipartFile csvFile = new MockMultipartFile("file", "/local/path/csv-missing-field-test.csv",
"text/csv", csv);
context.restoreAuthSystemState();
String authToken = getAuthToken(eperson.getEmail(), password);
// bulk create workspaceitems in the default collection (col1)
getClient(authToken).perform(fileUpload("/api/submission/workspaceitems")
.file(csvFile))
// bulk create should return 200, 201 (created) is better for single resource
.andExpect(status().isOk())
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value",
is("My Article")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.author'][0].value",
is("Nobody")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone"
+ "['dc.date.issued'][0].value").doesNotExist())
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.source'][0].value",
is("My Journal")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone"
+ "['dc.identifier.issn'][0].value",
is("Mock ISSN")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value"
).doesNotExist())
.andExpect(
jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString())))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]"
+ ".metadata['dc.source'][0].value",
is("/local/path/csv-missing-field-test.csv")))
.andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]"
+ ".metadata['dc.title'][0].value",
is("csv-missing-field-test.csv")))
.andExpect(
jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist());
;
csv.close();
}
@Test
/**

View File

@@ -0,0 +1 @@
My article,Nobody,,My Journal,"This is my abstract, i user comma to check escape works fine",Mock ISSN
1 My article Nobody My Journal This is my abstract, i user comma to check escape works fine Mock ISSN

View File

@@ -0,0 +1 @@
My article,Nobody,2006,My Journal,"This is my abstract, i user comma to check escape works fine",Mock ISSN,Mock subtype
1 My article Nobody 2006 My Journal This is my abstract, i user comma to check escape works fine Mock ISSN Mock subtype

View File

@@ -0,0 +1,52 @@
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:util="http://www.springframework.org/schema/util"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-2.5.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd"
default-autowire-candidates="*Service,*DAO,javax.sql.DataSource">
<context:annotation-config/>
<!-- allows us to use spring annotations in beans -->
<util:map id="endnoteMetadataFieldMap" key-type="org.dspace.importer.external.metadatamapping.MetadataFieldConfig"
value-type="org.dspace.importer.external.metadatamapping.contributor.MetadataContributor">
<description>Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
what metadatafield is generated.
</description>
<entry key-ref="dcTitle" value-ref="endnoteTitleContrib" />
<entry key-ref="dcAuthors" value-ref="endnoteAuthorsContrib" />
<entry key-ref="dcAbstract" value-ref="endnoteAbstractContrib" />
<entry key-ref="dcIssued" value-ref="endnoteIssuedContrib" />
<entry key-ref="dcJournal" value-ref="endnoteJournalContrib" />
</util:map>
<bean id="endnoteJournalContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleMetadataContributor">
<property name="field" ref="dcJournal"/>
<property name="key" value="SO" />
</bean>
<bean id="endnoteIssuedContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleMetadataContributor">
<property name="field" ref="dcIssued"/>
<property name="key" value="PY" />
</bean>
<bean id="endnoteAbstractContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleMetadataContributor">
<property name="field" ref="dcAbstract"/>
<property name="key" value="AB" />
</bean>
<bean id="endnoteAuthorsContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleMetadataContributor">
<property name="field" ref="dcAuthors"/>
<property name="key" value="AU" />
</bean>
<bean id="endnoteTitleContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleMetadataContributor">
<property name="field" ref="dcTitle"/>
<property name="key" value="TI" />
</bean>
</beans>