diff --git a/dspace-api/src/main/java/org/dspace/importer/external/csv/service/CharacterSeparatedImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/csv/service/CharacterSeparatedImportMetadataSourceServiceImpl.java new file mode 100644 index 0000000000..31ee1e5e5a --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/csv/service/CharacterSeparatedImportMetadataSourceServiceImpl.java @@ -0,0 +1,154 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.csv.service; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import au.com.bytecode.opencsv.CSVReader; +import org.dspace.importer.external.exception.FileSourceException; +import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; +import org.dspace.importer.external.metadatamapping.contributor.MetadataContributor; +import org.dspace.importer.external.service.components.AbstractPlainMetadataSource; +import org.dspace.importer.external.service.components.MetadataSource; +import org.dspace.importer.external.service.components.dto.PlainMetadataKeyValueItem; +import org.dspace.importer.external.service.components.dto.PlainMetadataSourceDto; + + +/** + * This class is an implementation of {@link MetadataSource} which extends {@link AbstractPlainMetadataSource} + * in order to parse "character separated" files like csv, tsv, etc using the Live Import framework. + * + * @author Pasquale Cavallo + * + */ +public class CharacterSeparatedImportMetadataSourceServiceImpl extends AbstractPlainMetadataSource { + + private char separator = ','; + + private char escapeCharacter = '"'; + + private Integer skipLines = 1; + + private String importSource = "CsvMetadataSource"; + + /** + * Set the number of lines to skip at the start of the file. This method is suitable, + * for example, to skip file headers. + * + * @param skipLines number of the line at the start of the file to skip. + */ + public void setSkipLines(Integer skipLines) { + this.skipLines = skipLines; + } + + /** + * + * @return the number of the lines to skip + */ + public Integer getSkipLines() { + return skipLines; + } + + /** + * Method to inject the separator + * This must be the ASCII integer + * related to the char. + * In example, 9 for tab, 44 for comma + */ + public void setSeparator(char separator) { + this.separator = separator; + } + + @Override + public String getImportSource() { + return importSource; + } + + /** + * Method to set the name of the source + */ + public void setImportSource(String importSource) { + this.importSource = importSource; + } + + /** + * Method to inject the escape character. This must be the ASCII integer + * related to the char. + * In example, 9 for tab, 44 for comma + * + */ + public void setEscapeCharacter(char escapeCharacter) { + this.escapeCharacter = escapeCharacter; + } + + /** + * The method process any kind of "character separated" files, like CSV, TSV, and so on. + * It return a List of PlainMetadataSourceDto. + * Using the superclass methods AbstractPlainMetadataSource.getRecord(s), any of this + * element will then be converted in an {@link org.dspace.importer.external.datamodel.ImportRecord}. + + * Columns will be identified by their position, zero based notation. + * Separator character and escape character MUST be defined at class level. Number of lines to skip (headers) + * could also be defined in the field skipLines. + * + * @param InputStream The inputStream of the file + * @return A list of PlainMetadataSourceDto + * @throws FileSourceException if, for any reason, the file is not parsable + + */ + @Override + protected List readData(InputStream inputStream) throws FileSourceException { + List plainMetadataList = new ArrayList<>(); + try (CSVReader csvReader = new CSVReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), + separator, escapeCharacter);) { + // read all row + List lines = csvReader.readAll(); + int listSize = lines == null ? 0 : lines.size(); + int count = skipLines; + // iterate over row (skipping the first skipLines) + while (count < listSize) { + String [] items = lines.get(count); + List keyValueList = new ArrayList<>(); + if (items != null) { + int size = items.length; + int index = 0; + //iterate over column in the selected row + while (index < size) { + //create key/value item for the specifics row/column + PlainMetadataKeyValueItem keyValueItem = new PlainMetadataKeyValueItem(); + keyValueItem.setKey(String.valueOf(index)); + keyValueItem.setValue(items[index]); + keyValueList.add(keyValueItem); + index++; + } + //save all column key/value for the given row + PlainMetadataSourceDto dto = new PlainMetadataSourceDto(); + dto.setMetadata(keyValueList); + plainMetadataList.add(dto); + } + count++; + } + } catch (IOException e) { + throw new FileSourceException("Error reading file", e); + } + return plainMetadataList; + } + + @Override + public void setMetadataFieldMap(Map> metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/endnote/service/EndnoteImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/endnote/service/EndnoteImportMetadataSourceServiceImpl.java new file mode 100644 index 0000000000..9881832369 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/endnote/service/EndnoteImportMetadataSourceServiceImpl.java @@ -0,0 +1,140 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.endnote.service; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.dspace.importer.external.exception.FileSourceException; +import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; +import org.dspace.importer.external.metadatamapping.contributor.MetadataContributor; +import org.dspace.importer.external.service.components.AbstractPlainMetadataSource; +import org.dspace.importer.external.service.components.dto.PlainMetadataKeyValueItem; +import org.dspace.importer.external.service.components.dto.PlainMetadataSourceDto; + +/** + * Implements a metadata importer for Endnote files + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + */ +public class EndnoteImportMetadataSourceServiceImpl extends AbstractPlainMetadataSource { + + @Override + public String getImportSource() { + return "EndnoteMetadataSource"; + } + + /** + * This method map the data present in the inputStream, then return a list PlainMetadataSourceDto. + * Any PlainMetadataSourceDto will be used to create a single {@link org.dspace.importer.external.datamodel.ImportRecord} + * + * @param inputStream the inputStream of the Endnote file + * @return List of {@link org.dspace.importer.external.service.components.dto.PlainMetadataSourceDto} + * @throws FileSourceException + * @see org.dspace.importer.external.service.components.AbstractPlainMetadataSource + */ + @Override + protected List readData(InputStream fileInpuStream) throws FileSourceException { + List list = new ArrayList<>(); + try { + // row start from 3, because the first 2 (FN and VR) will be removed by tokenize + int lineForDebug = 3; + List tokenized = tokenize(fileInpuStream); + List tmpList = new ArrayList<>(); + // iterate over key/value pairs, create a new PlainMetadataSourceDto on "ER" rows (which means "new record) + // and stop on EF (end of file). + for (PlainMetadataKeyValueItem item : tokenized) { + if (item.getKey() == null || item.getKey().isEmpty()) { + throw new FileSourceException("Null or empty key expected on line " + + lineForDebug + ". Keys cannot be null nor empty"); + } + if ("EF".equals(item.getKey())) { + // end of file + break; + } + if ("ER".equals(item.getKey())) { + // new ImportRecord start from here (ER is a content delimiter) + // save the previous, then create a new one + PlainMetadataSourceDto dto = new PlainMetadataSourceDto(); + dto.setMetadata(new ArrayList<>(tmpList)); + list.add(dto); + tmpList = new ArrayList<>(); + } else { + if (item.getValue() == null || item.getValue().isEmpty()) { + throw new FileSourceException("Null or empty value expected on line " + + lineForDebug + ". Value expected"); + } + tmpList.add(item); + } + lineForDebug++; + } + } catch (Exception e) { + throw new FileSourceException("Error reading file", e); + } + return list; + } + + + /** + * This method iterate over file rows, split content in a list of key/value items through RexExp + * and save the content sequentially. + * Key "FN" and "VR", which is a preamble in Endnote, will be checked but not saved. + * + * @param fileInpuStream the inputStream of the Endnote file + * @return A list of key/value items which map the file's row sequentially + * @throws IOException + * @throws FileSourceException + */ + private List tokenize(InputStream fileInpuStream) + throws IOException, FileSourceException { + BufferedReader reader = new BufferedReader(new InputStreamReader(fileInpuStream)); + String line; + line = reader.readLine(); + // FN and VR works as preamble, just check and skip them + if (line == null || !line.startsWith("FN")) { + throw new FileSourceException("Invalid endNote file"); + } + line = reader.readLine(); + if (line == null || !line.startsWith("VR")) { + throw new FileSourceException("Invalid endNote file"); + } + // split any row into first part ^[A-Z]{2} used as key (the meaning of the data) + // and second part ?(.*) used as value (the data) + Pattern pattern = Pattern.compile("(^[A-Z]{2}) ?(.*)$"); + List list = new ArrayList(); + while ((line = reader.readLine()) != null) { + line = line.trim(); + // skip empty lines + if (line.isEmpty() || line.equals("")) { + continue; + } + Matcher matcher = pattern.matcher(line); + if (matcher.matches()) { + PlainMetadataKeyValueItem item = new PlainMetadataKeyValueItem(); + item.setKey(matcher.group(1)); + item.setValue(matcher.group(2)); + list.add(item); + } + } + return list; + } + + @Override + public void setMetadataFieldMap(Map> metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/EnhancedSimpleMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/EnhancedSimpleMetadataContributor.java new file mode 100644 index 0000000000..b06322ac2c --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/EnhancedSimpleMetadataContributor.java @@ -0,0 +1,108 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; + +import au.com.bytecode.opencsv.CSVReader; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; +import org.dspace.importer.external.service.components.dto.PlainMetadataKeyValueItem; +import org.dspace.importer.external.service.components.dto.PlainMetadataSourceDto; + + +/** + * This class implements functionalities to handle common situation regarding plain metadata. + * In some scenario, like csv or tsv, the format don't allow lists. + * We can use this MetadataContribut to parse a given plain metadata and split it into + * related list, based on the delimiter. No escape character is present. + * Default values are comma (,) for delimiter, and double quote (") for escape character + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ +public class EnhancedSimpleMetadataContributor extends SimpleMetadataContributor { + + private char delimiter = ','; + + private char escape = '"'; + + /** + * This method could be used to set the delimiter used during parse + * If no delimiter is set, comma will be used + */ + public void setDelimiter(char delimiter) { + this.delimiter = delimiter; + } + + /** + * This method could be used to get the delimiter used in this class + */ + public char getDelimiter() { + return delimiter; + } + + /** + * Method to inject the escape character. + * This must be the ASCII integer + * related to the char. + * In example, 9 for tab, 44 for comma + * If no escape is set, double quote will be used + */ + public void setEscape(char escape) { + this.escape = escape; + } + + /** + * Method to get the escape character. + * + */ + public char getEscape() { + return escape; + } + + @Override + public Collection contributeMetadata(PlainMetadataSourceDto t) { + Collection values = null; + values = new LinkedList<>(); + for (PlainMetadataKeyValueItem metadatum : t.getMetadata()) { + if (getKey().equals(metadatum.getKey())) { + String[] splitted = splitToRecord(metadatum.getValue()); + for (String value : splitted) { + MetadatumDTO dcValue = new MetadatumDTO(); + dcValue.setValue(value); + dcValue.setElement(getField().getElement()); + dcValue.setQualifier(getField().getQualifier()); + dcValue.setSchema(getField().getSchema()); + values.add(dcValue); + } + } + } + return values; + } + + private String[] splitToRecord(String value) { + List rows; + // For example, list of author must be: Author 1, author 2, author 3 + // if author name contains comma, is important to escape its in + // this way: Author 1, \"Author 2, something\", Author 3 + try (CSVReader csvReader = new CSVReader(new StringReader(value), + delimiter, escape);) { + rows = csvReader.readAll(); + } catch (IOException e) { + //fallback, use the inpu as value + return new String[] { value }; + } + //must be one row + return rows.get(0); + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/MultipleMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/MultipleMetadataContributor.java new file mode 100644 index 0000000000..2685948fd9 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/MultipleMetadataContributor.java @@ -0,0 +1,139 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.metadatamapping.contributor; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; + +import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; +import org.dspace.importer.external.metadatamapping.MetadataFieldMapping; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; + +/** + * This Contributor is helpful to avoid the limit of the Live Import Framework. + * In Live Import, one dc schema/element/qualifier could be associate with one and + * only one MetadataContributor, because the map they're saved in use dc entity as key. + * + * In fact, in this implementation we use the MetadataFieldConfig present in this MultipleMetadataContributor + * contributor, but the data (values of the dc metadatum) will be loaded using any of the contributor defined + * in the List metadatumContributors, by iterating over them. + * + * @see org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ +public class MultipleMetadataContributor implements MetadataContributor { + + private MetadataFieldConfig field; + + private List metadatumContributors; + + /** + * Empty constructor + */ + public MultipleMetadataContributor() { + } + + /** + * @param field {@link org.dspace.importer.external.metadatamapping.MetadataFieldConfig} used in + * mapping + * @param metadatumContributors A list of MetadataContributor + */ + public MultipleMetadataContributor(MetadataFieldConfig field, List metadatumContributors) { + this.field = field; + this.metadatumContributors = (LinkedList) metadatumContributors; + } + + /** + * Set the metadatafieldMapping used in the transforming of a record to actual metadata + * + * @param metadataFieldMapping the new mapping. + */ + @Override + public void setMetadataFieldMapping(MetadataFieldMapping> metadataFieldMapping) { + for (MetadataContributor metadatumContributor : metadatumContributors) { + metadatumContributor.setMetadataFieldMapping(metadataFieldMapping); + } + } + + + /** + * a separate Metadatum object is created for each index of Metadatum returned from the calls to + * MetadatumContributor.contributeMetadata(t) for each MetadatumContributor in the metadatumContributors list. + * All of them have as dc schema/element/qualifier the values defined in MetadataFieldConfig. + * + * @param t the object we are trying to translate + * @return a collection of metadata got from each MetadataContributor + */ + @Override + public Collection contributeMetadata(T t) { + Collection values = new ArrayList<>(); + for (MetadataContributor metadatumContributor : metadatumContributors) { + Collection metadata = metadatumContributor.contributeMetadata(t); + values.addAll(metadata); + } + changeDC(values); + return values; + } + + /** + * This method does the trick of this implementation. + * It changes the DC schema/element/qualifier of the given Metadatum into + * the ones present in this contributor. + * In this way, the contributors in metadatumContributors could have any dc values, + * because this method remap them all. + * + * @param the list of metadata we want to remap + */ + private void changeDC(Collection values) { + for (MetadatumDTO dto : values) { + dto.setElement(field.getElement()); + dto.setQualifier(field.getQualifier()); + dto.setSchema(field.getSchema()); + } + } + + /** + * Return the MetadataFieldConfig used while retrieving MetadatumDTO + * + * @return MetadataFieldConfig + */ + public MetadataFieldConfig getField() { + return field; + } + + /** + * Setting the MetadataFieldConfig + * + * @param field MetadataFieldConfig used while retrieving MetadatumDTO + */ + public void setField(MetadataFieldConfig field) { + this.field = field; + } + + /** + * Return the List of MetadataContributor objects set to this class + * + * @return metadatumContributors, list of MetadataContributor + */ + public List getMetadatumContributors() { + return metadatumContributors; + } + + /** + * Set the List of MetadataContributor objects set to this class + * + * @param metadatumContributors A list of MetadatumContributor classes + */ + public void setMetadatumContributors(List metadatumContributors) { + this.metadatumContributors = metadatumContributors; + } +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleMetadataContributor.java index 21dd1bfcee..1b9007f23c 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleMetadataContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleMetadataContributor.java @@ -77,18 +77,33 @@ public class SimpleMetadataContributor implements MetadataContributor implements QuerySource, FileSource { diff --git a/dspace-api/src/main/java/org/dspace/importer/external/ris/service/RisImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/ris/service/RisImportMetadataSourceServiceImpl.java new file mode 100644 index 0000000000..2574e187df --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/ris/service/RisImportMetadataSourceServiceImpl.java @@ -0,0 +1,141 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.ris.service; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.annotation.Resource; + +import org.dspace.importer.external.exception.FileSourceException; +import org.dspace.importer.external.service.components.AbstractPlainMetadataSource; +import org.dspace.importer.external.service.components.dto.PlainMetadataKeyValueItem; +import org.dspace.importer.external.service.components.dto.PlainMetadataSourceDto; + +/** + * Implements a metadata importer for RIS files + * Implementations insprider by BTE DataLoader {@link https://github.com/EKT/Biblio-Transformation-Engine/blob/master/bte-io/src/main/java/gr/ekt/bteio/loaders/RISDataLoader.java} + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + */ +public class RisImportMetadataSourceServiceImpl extends AbstractPlainMetadataSource { + + @Override + public String getImportSource() { + return "RISMetadataSource"; + } + + @Override + protected List readData(InputStream inputStream) throws FileSourceException { + return aggregateData(inputStream); + } + + /** + * This method map the data present in the inputStream, then return a list PlainMetadataSourceDto. + * Any PlainMetadataSourceDto will be used to create a single {@link org.dspace.importer.external.datamodel.ImportRecord} + * + * @see org.dspace.importer.external.service.components.AbstractPlainMetadataSource + * + * @param inputStream the inputStream of the RIS file + * @return List of {@link org.dspace.importer.external.service.components.dto.PlainMetadataSourceDto} + * @throws FileSourceException + */ + private List aggregateData(InputStream inputStream) throws FileSourceException { + List metadata = new ArrayList<>(); + //map any line of the field to a key/value pair + List notAggregatedItems = notAggregatedData(inputStream); + List aggregatedTmpList = null; + Iterator itr = notAggregatedItems.iterator(); + // iterate over the list of key/value items + // create a new PlainMetadataSourceDto (which map and ImportRecord) + // any times the key is "TY" (content separator in RIS) + while (itr.hasNext()) { + PlainMetadataKeyValueItem item = itr.next(); + if ("TY".equals(item.getKey())) { + if (aggregatedTmpList != null) { + PlainMetadataSourceDto dto = new PlainMetadataSourceDto(); + dto.setMetadata(new ArrayList<>(aggregatedTmpList)); + metadata.add(dto); + } + aggregatedTmpList = new ArrayList<>(); + aggregatedTmpList.add(item); + } else { + if (aggregatedTmpList != null) { + aggregatedTmpList.add(item); + // save last iteration metadata + if (!itr.hasNext()) { + PlainMetadataSourceDto dto = new PlainMetadataSourceDto(); + dto.setMetadata(new ArrayList<>(aggregatedTmpList)); + metadata.add(dto); + } + } + } + } + return metadata; + } + + /** + * This method transform any row of the RIS file into a PlainMetadataKeyValueItem, + * splitting the row sequentially through a RegExp without take care of the means of the data. + * In this way, all entries present in the file are mapped in the resulting list. + * + * @param inputStream the inputStrem of the file + * @return A list + * @throws FileSourceException + */ + private List notAggregatedData(InputStream inputStream) throws FileSourceException { + LinkedList items = new LinkedList<>(); + BufferedReader reader; + try { + reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + if (line.isEmpty() || line.equals("") || line.matches("^\\s*$")) { + continue; + } + //match valid RIS entry + Pattern risPattern = Pattern.compile("^([A-Z][A-Z0-9]) - (.*)$"); + Matcher risMatcher = risPattern.matcher(line); + if (risMatcher.matches()) { + PlainMetadataKeyValueItem keyValueItem = new PlainMetadataKeyValueItem(); + keyValueItem.setValue(risMatcher.group(2)); + keyValueItem.setKey(risMatcher.group(1)); + items.add(keyValueItem); + } else { + if (!items.isEmpty()) { + items.getLast().setValue(items.getLast().getValue().concat(line)); + } + } + } + } catch (Exception e) { + throw new FileSourceException("Cannot parse RIS file", e); + } + return items; + } + + /** + * Retrieve the MetadataFieldMapping containing the mapping between RecordType + * (in this case PlainMetadataSourceDto.class) and Metadata + * + * @return The configured MetadataFieldMapping + */ + @Override + @SuppressWarnings("unchecked") + @Resource(name = "risMetadataFieldMap") + public void setMetadataFieldMap(@SuppressWarnings("rawtypes") Map metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/service/components/FileSource.java b/dspace-api/src/main/java/org/dspace/importer/external/service/components/FileSource.java index febe01ee53..5bef0984df 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/service/components/FileSource.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/service/components/FileSource.java @@ -22,6 +22,11 @@ import org.dspace.importer.external.exception.FileSourceException; */ public interface FileSource extends MetadataSource { + /** + * Get the file extensions (xml, csv, txt, ...) supported by the FileSource + */ + public List getSupportedExtensions(); + /** * Return a list of ImportRecord constructed from input file. * @@ -62,9 +67,4 @@ public interface FileSource extends MetadataSource { return false; } - /** - * Get the file extensions (xml, csv, txt, ...) supported by the FileSource implementation - */ - public List getSupportedExtensions(); - } diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index 0efe6c8142..0046366f2e 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -26,13 +26,15 @@ Will result in using the PubmedImportService for the lookup step Omitting this property will default to searching over all configured ImportService implementations --> - - + - - + + + + + + @@ -47,9 +49,9 @@ - - + + @@ -60,12 +62,21 @@ - - + + + + ris + + + + + bib @@ -74,6 +85,42 @@ + + + + + + csv + + + + + + + + + + + + tsv + + + + + + + + + enl + enw + + + + + diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/WorkspaceItemRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/WorkspaceItemRestRepository.java index 1d6471c6be..7ee157ab52 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/WorkspaceItemRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/WorkspaceItemRestRepository.java @@ -385,6 +385,9 @@ public class WorkspaceItemRestRepository extends DSpaceRestRepository> idRef = new AtomicReference<>(); String authToken = getAuthToken(eperson.getEmail(), password); - // create a workspaceitem from a single bibliographic entry file explicitly in the default collection (col1) - getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + try { + // create a workspaceitem from a single bibliographic entry file explicitly in the default collection (col1) + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(bibtexFile)) // create should return 200, 201 (created) is better for single resource .andExpect(status().isOk()) @@ -915,11 +917,20 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration + ".metadata['dc.title'][0].value", is("bibtex-test.bib"))) .andExpect( - jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()); - ; + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } // create a workspaceitem from a single bibliographic entry file explicitly in the col2 - getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(bibtexFile) .param("owningCollection", col2.getID().toString())) .andExpect(status().isOk()) @@ -934,12 +945,522 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration + ".files[0].metadata['dc.title'][0].value", is("bibtex-test.bib"))) .andExpect( - jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()); - + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } bibtex.close(); } + @Test + /** + * Test the creation of workspaceitems POSTing to the resource collection endpoint a csv file + * + * @throws Exception + */ + public void createSingleWorkspaceItemFromCSVWithOneEntryTest() throws Exception { + context.turnOffAuthorisationSystem(); + //** GIVEN ** + //1. A community-collection structure with one parent community with sub-community and two collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity) + .withName("Sub Community") + .build(); + Collection col1 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 1") + .withSubmitterGroup(eperson) + .build(); + Collection col2 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 2") + .withSubmitterGroup(eperson) + .build(); + + InputStream csv = getClass().getResourceAsStream("csv-test.csv"); + final MockMultipartFile csvFile = new MockMultipartFile("file", "/local/path/csv-test.csv", + "text/csv", csv); + + context.restoreAuthSystemState(); + + String authToken = getAuthToken(eperson.getEmail(), password); + // create workspaceitems in the default collection (col1) + AtomicReference> idRef = new AtomicReference<>(); + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + .file(csvFile)) + // create should return 200, 201 (created) is better for single resource + .andExpect(status().isOk()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value", + is("My Article"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][0].value", + is("Nobody"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.date.issued'][0].value", + is("2006"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.identifier.issn'][0].value", + is("Mock ISSN"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value", + is("Mock subtype"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString()))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.source'][0].value", + is("/local/path/csv-test.csv"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.title'][0].value", + is("csv-test.csv"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + + // create workspaceitems explicitly in the col2 + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + .file(csvFile) + .param("owningCollection", col2.getID().toString())) + .andExpect(status().isOk()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.title'][0].value", + is("My Article"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][0].value", + is("Nobody"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.date.issued'][0].value", + is("2006"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.identifier.issn'][0].value", + is("Mock ISSN"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value", + is("Mock subtype"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col2.getID().toString()))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.source'][0].value", + is("/local/path/csv-test.csv"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload" + + ".files[0].metadata['dc.title'][0].value", + is("csv-test.csv"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + csv.close(); + } + + @Test + /** + * Test the creation of workspaceitems POSTing to the resource collection endpoint a csv file + * with some missing data + * + * @throws Exception + */ + public void createSingleWorkspaceItemFromCSVWithOneEntryAndMissingDataTest() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community-collection structure with one parent community with sub-community and two collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity) + .withName("Sub Community") + .build(); + Collection col1 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 1") + .withSubmitterGroup(eperson) + .build(); + Collection col2 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 2") + .withSubmitterGroup(eperson) + .build(); + + InputStream csv = getClass().getResourceAsStream("csv-missing-field-test.csv"); + final MockMultipartFile csvFile = new MockMultipartFile("file", "/local/path/csv-missing-field-test.csv", + "text/csv", csv); + + context.restoreAuthSystemState(); + + String authToken = getAuthToken(eperson.getEmail(), password); + AtomicReference> idRef = new AtomicReference<>(); + // create workspaceitems in the default collection (col1) + + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + .file(csvFile)) + // create should return 200, 201 (created) is better for single resource + .andExpect(status().isOk()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value", + is("My Article"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][0].value", + is("Nobody"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][1].value", + is("Try escape, in item"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.date.issued'][0].value").isEmpty()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.identifier.issn'][0].value", + is("Mock ISSN"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value" + ).doesNotExist()) + .andExpect( + jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString()))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.source'][0].value", + is("/local/path/csv-missing-field-test.csv"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.title'][0].value", + is("csv-missing-field-test.csv"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + csv.close(); + } + + @Test + /** + * Test the creation of workspaceitems POSTing to the resource collection endpoint a tsv file + * + * @throws Exception + */ + public void createSingleWorkspaceItemFromTSVWithOneEntryTest() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community-collection structure with one parent community with sub-community and two collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity) + .withName("Sub Community") + .build(); + Collection col1 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 1") + .withSubmitterGroup(eperson) + .build(); + Collection col2 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 2") + .withSubmitterGroup(eperson) + .build(); + + InputStream tsv = getClass().getResourceAsStream("tsv-test.tsv"); + final MockMultipartFile tsvFile = new MockMultipartFile("file", "/local/path/tsv-test.tsv", + "text/tsv", tsv); + + context.restoreAuthSystemState(); + + String authToken = getAuthToken(eperson.getEmail(), password); + AtomicReference> idRef = new AtomicReference<>(); + + // create workspaceitems in the default collection (col1) + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + .file(tsvFile)) + // create should return 200, 201 (created) is better for single resource + .andExpect(status().isOk()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value", + is("My Article"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][0].value", + is("Nobody"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.date.issued'][0].value", + is("2006"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.identifier.issn'][0].value", + is("Mock ISSN"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value", + is("Mock subtype"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString()))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.source'][0].value", + is("/local/path/tsv-test.tsv"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.title'][0].value", + is("tsv-test.tsv"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + tsv.close(); + } + + @Test + /** + * Test the creation of workspaceitems POSTing to the resource collection endpoint a ris file + * + * @throws Exception + */ + public void createSingleWorkspaceItemFromRISWithOneEntryTest() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community-collection structure with one parent community with sub-community and two collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity) + .withName("Sub Community") + .build(); + Collection col1 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 1") + .withSubmitterGroup(eperson) + .build(); + Collection col2 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 2") + .withSubmitterGroup(eperson) + .build(); + + InputStream ris = getClass().getResourceAsStream("ris-test.ris"); + final MockMultipartFile tsvFile = new MockMultipartFile("file", "/local/path/ris-test.ris", + "text/ris", ris); + + context.restoreAuthSystemState(); + + String authToken = getAuthToken(eperson.getEmail(), password); + AtomicReference> idRef = new AtomicReference<>(); + + // create workspaceitems in the default collection (col1) + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + .file(tsvFile)) + // create should return 200, 201 (created) is better for single resource + .andExpect(status().isOk()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value", + is("Challenge–Response Identification"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][1].value", + is("Challenge–Response Identification second title"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][0].value", + is("Just, Mike"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.date.issued'][0].value", + is("2005"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.identifier.issn'][0].value", + is("978-0-387-23483-0"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value", + is("Mock subtype"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString()))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.source'][0].value", + is("/local/path/ris-test.ris"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.title'][0].value", + is("ris-test.ris"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + ris.close(); + } + + @Test + /** + * Test the creation of workspaceitems POSTing to the resource collection endpoint an endnote file + * + * @throws Exception + */ + public void createSingleWorkspaceItemFromEndnoteWithOneEntryTest() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community-collection structure with one parent community with sub-community and two collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity) + .withName("Sub Community") + .build(); + Collection col1 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 1") + .withSubmitterGroup(eperson) + .build(); + Collection col2 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 2") + .withSubmitterGroup(eperson) + .build(); + + InputStream endnote = getClass().getResourceAsStream("endnote-test.enw"); + final MockMultipartFile endnoteFile = new MockMultipartFile("file", "/local/path/endnote-test.enw", + "text/endnote", endnote); + + context.restoreAuthSystemState(); + + String authToken = getAuthToken(eperson.getEmail(), password); + AtomicReference> idRef = new AtomicReference<>(); + // create workspaceitems in the default collection (col1) + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + .file(endnoteFile)) + // create should return 200, 201 (created) is better for single resource + .andExpect(status().isOk()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value", + is("My Title"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][0].value", + is("Author 1"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][1].value", + is("Author 2"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.date.issued'][0].value", + is("2005"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpagetwo" + + "['dc.description.abstract'][0].value", + is("This is my abstract"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString()))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.source'][0].value", + is("/local/path/endnote-test.enw"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.title'][0].value", + is("endnote-test.enw"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + endnote.close(); + } + + + @Test + /** + * Test the creation of workspaceitems POSTing to the resource collection endpoint a csv file + * with some missing data and inner tab in field (those have to be read as list) + * + * @throws Exception + */ + public void createSingleWorkspaceItemFromTSVWithOneEntryAndMissingDataTest() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community-collection structure with one parent community with sub-community and two collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity) + .withName("Sub Community") + .build(); + Collection col1 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 1") + .withSubmitterGroup(eperson) + .build(); + Collection col2 = CollectionBuilder.createCollection(context, child1) + .withName("Collection 2") + .withSubmitterGroup(eperson) + .build(); + + InputStream tsv = getClass().getResourceAsStream("tsv-missing-field-test.tsv"); + final MockMultipartFile csvFile = new MockMultipartFile("file", "/local/path/tsv-missing-field-test.tsv", + "text/tsv", tsv); + + context.restoreAuthSystemState(); + + String authToken = getAuthToken(eperson.getEmail(), password); + AtomicReference> idRef = new AtomicReference<>(); + + // create workspaceitems in the default collection (col1) + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + .file(csvFile)) + // create should return 200, 201 (created) is better for single resource + .andExpect(status().isOk()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value", + is("My Article"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][0].value", + is("Nobody"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.contributor.author'][1].value", + is("Try escape \t\t\tin \t\titem"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.date.issued'][0].value").isEmpty()) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone" + + "['dc.identifier.issn'][0].value", + is("Mock ISSN"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.type'][0].value" + ).doesNotExist()) + .andExpect( + jsonPath("$._embedded.workspaceitems[0]._embedded.collection.id", is(col1.getID().toString()))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.source'][0].value", + is("/local/path/tsv-missing-field-test.tsv"))) + .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + + ".metadata['dc.title'][0].value", + is("tsv-missing-field-test.tsv"))) + .andExpect( + jsonPath("$._embedded.workspaceitems[*]._embedded.upload").doesNotExist()) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + tsv.close(); + } @Test /** @@ -977,8 +1498,11 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration context.restoreAuthSystemState(); String authToken = getAuthToken(eperson.getEmail(), password); + AtomicReference> idRef = new AtomicReference<>(); + // create a workspaceitem from a single bibliographic entry file explicitly in the default collection (col1) - getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(bibtexFile).file(pubmedFile)) // create should return 200, 201 (created) is better for single resource .andExpect(status().isOk()) @@ -999,10 +1523,20 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration is("/local/path/pubmed-test.xml"))) .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[1]" + ".metadata['dc.title'][0].value", - is("pubmed-test.xml"))); + is("pubmed-test.xml"))) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } // create a workspaceitem from a single bibliographic entry file explicitly in the col2 - getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(bibtexFile).file(pubmedFile) .param("owningCollection", col2.getID().toString())) .andExpect(status().isOk()) @@ -1023,7 +1557,16 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration is("/local/path/pubmed-test.xml"))) .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[1]" + ".metadata['dc.title'][0].value", - is("pubmed-test.xml"))); + is("pubmed-test.xml"))) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } bibtex.close(); xmlIS.close(); } @@ -1063,6 +1606,7 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration context.restoreAuthSystemState(); String authToken = getAuthToken(eperson.getEmail(), password); + // create a workspaceitem from a single bibliographic entry file explicitly in the default collection (col1) getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(bibtexFile)) @@ -1105,8 +1649,11 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration context.restoreAuthSystemState(); String authToken = getAuthToken(eperson.getEmail(), password); + AtomicReference> idRef = new AtomicReference<>(); + // create a workspaceitem from a single bibliographic entry file explicitly in the default collection (col1) - getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(pubmedFile)) .andExpect(status().isOk()) .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.traditionalpageone['dc.title'][0].value", @@ -1123,10 +1670,21 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration is("/local/path/pubmed-test.xml"))) .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0]" + ".metadata['dc.title'][0].value", - is("pubmed-test.xml"))); + is("pubmed-test.xml"))) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } + // create a workspaceitem from a single bibliographic entry file explicitly in the col2 - getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") + try { + getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(pubmedFile) .param("owningCollection", col2.getID().toString())) .andExpect(status().isOk()) @@ -1142,8 +1700,16 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0].metadata['dc.source'][0].value", is("/local/path/pubmed-test.xml"))) .andExpect(jsonPath("$._embedded.workspaceitems[0].sections.upload.files[0].metadata['dc.title'][0].value", - is("pubmed-test.xml"))); - + is("pubmed-test.xml"))) + .andDo(result -> idRef.set(read(result.getResponse().getContentAsString(), + "$._embedded.workspaceitems[*].id"))); + } finally { + if (idRef != null && idRef.get() != null) { + for (int i : idRef.get()) { + WorkspaceItemBuilder.deleteWorkspaceItem(i); + } + } + } xmlIS.close(); } @@ -1177,10 +1743,10 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration context.restoreAuthSystemState(); - // bulk create a workspaceitem + // create a workspaceitem getClient(authToken).perform(fileUpload("/api/submission/workspaceitems") .file(pdfFile)) - // bulk create should return 200, 201 (created) is better for single resource + // create should return 200, 201 (created) is better for single resource .andExpect(status().isOk()) //FIXME it will be nice to setup a mock grobid server for end to end testing // no metadata for now diff --git a/dspace-server-webapp/src/test/resources/org/dspace/app/rest/csv-missing-field-test.csv b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/csv-missing-field-test.csv new file mode 100644 index 0000000000..7f3f5cb750 --- /dev/null +++ b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/csv-missing-field-test.csv @@ -0,0 +1,2 @@ +Title,Author,Year,Journal,Abstract,ISSN,Type +My Article,"Nobody, \"Try escape, in item\"",,My Journal,"This is my abstract, i use comma to check escape works fine",Mock ISSN \ No newline at end of file diff --git a/dspace-server-webapp/src/test/resources/org/dspace/app/rest/csv-test.csv b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/csv-test.csv new file mode 100644 index 0000000000..d5bc35a77b --- /dev/null +++ b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/csv-test.csv @@ -0,0 +1,2 @@ +Title,Author,Year,Journal,Abstract,ISSN,Type +My Article,Nobody,2006,My Journal,"This is my abstract, i use comma to check escape works fine",Mock ISSN,Mock subtype \ No newline at end of file diff --git a/dspace-server-webapp/src/test/resources/org/dspace/app/rest/endnote-test.enw b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/endnote-test.enw new file mode 100644 index 0000000000..25cc749d92 --- /dev/null +++ b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/endnote-test.enw @@ -0,0 +1,10 @@ +FN +VR +SO My Journal +PY 2005 +AB This is my abstract +AU Author 1 +AU Author 2 +TI My Title +ER +EF \ No newline at end of file diff --git a/dspace-server-webapp/src/test/resources/org/dspace/app/rest/ris-test.ris b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/ris-test.ris new file mode 100644 index 0000000000..e056e6ace2 --- /dev/null +++ b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/ris-test.ris @@ -0,0 +1,20 @@ +TY - CHAP +AU - Just, Mike +ED - van Tilborg, Henk C. A. +PY - 2005 +DA - 2005// +TI - Challenge–Response Identification +T1 - Challenge–Response Identification second title +BT - Encyclopedia of Cryptography and Security +SP - 73 +EP - 74 +PB - Springer US +CY - Boston, MA +SN - 978-0-387-23483-0 +SO - My Journal +UR - https://doi.org/10.1007/0-387-23483-7_56 +DO - 10.1007/0-387-23483-7_56 +ID - Just2005 +PT - Mock subtype +AB - This is the abstract +ER - \ No newline at end of file diff --git a/dspace-server-webapp/src/test/resources/org/dspace/app/rest/tsv-missing-field-test.tsv b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/tsv-missing-field-test.tsv new file mode 100644 index 0000000000..86659b9a38 --- /dev/null +++ b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/tsv-missing-field-test.tsv @@ -0,0 +1,2 @@ +Title Author Year Journal Abstract ISSN Type +My Article "Nobody, \"Try escape in item\"" My Journal "This is my abstract, i use tab to check escape works fine" Mock ISSN \ No newline at end of file diff --git a/dspace-server-webapp/src/test/resources/org/dspace/app/rest/tsv-test.tsv b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/tsv-test.tsv new file mode 100644 index 0000000000..506a725c96 --- /dev/null +++ b/dspace-server-webapp/src/test/resources/org/dspace/app/rest/tsv-test.tsv @@ -0,0 +1,2 @@ +Title Author Year Journal Abstract ISSN Type +My Article Nobody 2006 My Journal "This is my abstract i'm using use tab to check escape works fine" Mock ISSN Mock subtype \ No newline at end of file diff --git a/dspace/config/spring/api/bibtex-integration.xml b/dspace/config/spring/api/bibtex-integration.xml index 9675ef82b3..eeabace1c7 100644 --- a/dspace/config/spring/api/bibtex-integration.xml +++ b/dspace/config/spring/api/bibtex-integration.xml @@ -17,58 +17,36 @@ only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over what metadatafield is generated. - - - - - + + + + + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - + \ No newline at end of file diff --git a/dspace/config/spring/api/characterseparated-integration.xml b/dspace/config/spring/api/characterseparated-integration.xml new file mode 100644 index 0000000000..1ee62173f1 --- /dev/null +++ b/dspace/config/spring/api/characterseparated-integration.xml @@ -0,0 +1,80 @@ + + + + + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dspace/config/spring/api/dublicore-metadata-mapper.xml b/dspace/config/spring/api/dublicore-metadata-mapper.xml new file mode 100644 index 0000000000..6461f129a5 --- /dev/null +++ b/dspace/config/spring/api/dublicore-metadata-mapper.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dspace/config/spring/api/endnote-integration.xml b/dspace/config/spring/api/endnote-integration.xml new file mode 100644 index 0000000000..15ff3ca6f7 --- /dev/null +++ b/dspace/config/spring/api/endnote-integration.xml @@ -0,0 +1,52 @@ + + + + + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dspace/config/spring/api/ris-integration.xml b/dspace/config/spring/api/ris-integration.xml new file mode 100644 index 0000000000..3a7f0feade --- /dev/null +++ b/dspace/config/spring/api/ris-integration.xml @@ -0,0 +1,77 @@ + + + + + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file