Remove BTE and implement the metadata extraction step using the Live Import

This commit is contained in:
Andrea Bollini
2021-03-16 19:48:38 +01:00
parent 2b4f22be65
commit 2eed442e06
57 changed files with 709 additions and 4989 deletions

View File

@@ -537,17 +537,17 @@
<artifactId>rome-modules</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>gr.ekt.bte</groupId>
<artifactId>bte-io</artifactId>
<version>0.9.3.5</version>
<exclusions>
<exclusion>
<groupId>net.bytebuddy</groupId>
<artifactId>byte-buddy</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>2.3</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.jbibtex</groupId>
<artifactId>jbibtex</artifactId>
<version>1.0.10</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>

View File

@@ -1,106 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.itemimport;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.TransformationEngine;
import gr.ekt.bte.dataloader.FileDataLoader;
/**
* This class acts as a Service in the procedure to batch import using the Biblio-Transformation-Engine
*/
public class BTEBatchImportService {
TransformationEngine transformationEngine;
Map<String, DataLoader> dataLoaders = new HashMap<String, DataLoader>();
Map<String, String> outputMap = new HashMap<String, String>();
/**
* Default constructor
*/
public BTEBatchImportService() {
super();
}
/**
* Setter method for dataLoaders parameter
*
* @param dataLoaders map of data loaders
*/
public void setDataLoaders(Map<String, DataLoader> dataLoaders) {
this.dataLoaders = dataLoaders;
}
/**
* Get data loaders
*
* @return the map of DataLoaders
*/
public Map<String, DataLoader> getDataLoaders() {
return dataLoaders;
}
/**
* Get output map
*
* @return the outputMapping
*/
public Map<String, String> getOutputMap() {
return outputMap;
}
/**
* Setter method for the outputMapping
*
* @param outputMap the output mapping
*/
public void setOutputMap(Map<String, String> outputMap) {
this.outputMap = outputMap;
}
/**
* Get transformation engine
*
* @return transformation engine
*/
public TransformationEngine getTransformationEngine() {
return transformationEngine;
}
/**
* set transformation engine
*
* @param transformationEngine transformation engine
*/
public void setTransformationEngine(TransformationEngine transformationEngine) {
this.transformationEngine = transformationEngine;
}
/**
* Getter of file data loaders
*
* @return List of file data loaders
*/
public List<String> getFileDataLoaders() {
List<String> result = new ArrayList<String>();
for (String key : dataLoaders.keySet()) {
DataLoader dl = dataLoaders.get(key);
if (dl instanceof FileDataLoader) {
result.add(key);
}
}
return result;
}
}

View File

@@ -73,7 +73,6 @@ public class ItemImportCLITool {
Options options = new Options();
options.addOption("a", "add", false, "add items to DSpace");
options.addOption("b", "add-bte", false, "add items to DSpace via Biblio-Transformation-Engine (BTE)");
options.addOption("r", "replace", false, "replace items in mapfile");
options.addOption("d", "delete", false,
"delete items listed in mapfile");
@@ -388,8 +387,6 @@ public class ItemImportCLITool {
myloader.replaceItems(c, mycollections, sourcedir, mapfile, template);
} else if ("delete".equals(command)) {
myloader.deleteItems(c, mapfile);
} else if ("add-bte".equals(command)) {
myloader.addBTEItems(c, mycollections, sourcedir, mapfile, template, bteInputType, null);
}
// complete all transactions

View File

@@ -45,13 +45,6 @@ import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.TransformationEngine;
import gr.ekt.bte.core.TransformationResult;
import gr.ekt.bte.core.TransformationSpec;
import gr.ekt.bte.dataloader.FileDataLoader;
import gr.ekt.bteio.generators.DSpaceOutputGenerator;
import gr.ekt.bteio.loaders.OAIPMHDataLoader;
import org.apache.commons.collections4.ComparatorUtils;
import org.apache.commons.io.FileDeleteStrategy;
import org.apache.commons.io.FileUtils;
@@ -96,7 +89,6 @@ import org.dspace.eperson.service.EPersonService;
import org.dspace.eperson.service.GroupService;
import org.dspace.handle.service.HandleService;
import org.dspace.services.ConfigurationService;
import org.dspace.utils.DSpace;
import org.dspace.workflow.WorkflowItem;
import org.dspace.workflow.WorkflowService;
import org.springframework.beans.factory.InitializingBean;
@@ -200,100 +192,6 @@ public class ItemImportServiceImpl implements ItemImportService, InitializingBea
}
/**
* In this method, the BTE is instantiated. THe workflow generates the DSpace files
* necessary for the upload, and the default item import method is called
*
* @param c The contect
* @param mycollections The collections the items are inserted to
* @param sourceDir The filepath to the file to read data from
* @param mapFile The filepath to mapfile to be generated
* @param template whether to use collection template item as starting point
* @param inputType The type of the input data (bibtex, csv, etc.)
* @param workingDir The path to create temporary files (for command line or UI based)
* @throws Exception if error occurs
*/
@Override
public void addBTEItems(Context c, List<Collection> mycollections,
String sourceDir, String mapFile, boolean template, String inputType, String workingDir)
throws Exception {
//Determine the folder where BTE will output the results
String outputFolder = null;
if (workingDir == null) { //This indicates a command line import, create a random path
File importDir = new File(configurationService.getProperty("org.dspace.app.batchitemimport.work.dir"));
if (!importDir.exists()) {
boolean success = importDir.mkdir();
if (!success) {
log.info("Cannot create batch import directory!");
throw new Exception("Cannot create batch import directory!");
}
}
//Get a random folder in case two admins batch import data at the same time
outputFolder = importDir + File.separator + generateRandomFilename(true);
} else { //This indicates a UI import, working dir is preconfigured
outputFolder = workingDir;
}
BTEBatchImportService dls = new DSpace().getSingletonService(BTEBatchImportService.class);
DataLoader dataLoader = dls.getDataLoaders().get(inputType);
Map<String, String> outputMap = dls.getOutputMap();
TransformationEngine te = dls.getTransformationEngine();
if (dataLoader == null) {
System.out.println(
"ERROR: The key used in -i parameter must match a valid DataLoader in the BTE Spring XML " +
"configuration file!");
return;
}
if (outputMap == null) {
System.out.println(
"ERROR: The key used in -i parameter must match a valid outputMapping in the BTE Spring XML " +
"configuration file!");
return;
}
if (dataLoader instanceof FileDataLoader) {
FileDataLoader fdl = (FileDataLoader) dataLoader;
if (!StringUtils.isBlank(sourceDir)) {
System.out.println(
"INFO: Dataloader will load data from the file specified in the command prompt (and not from the " +
"Spring XML configuration file)");
fdl.setFilename(sourceDir);
}
} else if (dataLoader instanceof OAIPMHDataLoader) {
OAIPMHDataLoader fdl = (OAIPMHDataLoader) dataLoader;
System.out.println(sourceDir);
if (!StringUtils.isBlank(sourceDir)) {
System.out.println(
"INFO: Dataloader will load data from the address specified in the command prompt (and not from " +
"the Spring XML configuration file)");
fdl.setServerAddress(sourceDir);
}
}
if (dataLoader != null) {
System.out.println("INFO: Dataloader " + dataLoader.toString() + " will be used for the import!");
te.setDataLoader(dataLoader);
DSpaceOutputGenerator outputGenerator = new DSpaceOutputGenerator(outputMap);
outputGenerator.setOutputDirectory(outputFolder);
te.setOutputGenerator(outputGenerator);
try {
TransformationResult res = te.transform(new TransformationSpec());
List<String> output = res.getOutput();
outputGenerator.writeOutput(output);
} catch (Exception e) {
System.err.println("Exception");
e.printStackTrace();
throw e;
}
addItems(c, mycollections, outputFolder, mapFile, template);
}
}
@Override
public void addItemsAtomic(Context c, List<Collection> mycollections, String sourceDir, String mapFile,
boolean template) throws Exception {
@@ -1739,9 +1637,6 @@ public class ItemImportServiceImpl implements ItemImportService, InitializingBea
if (theInputType.equals("saf") || theInputType
.equals("safupload")) { //In case of Simple Archive Format import
addItems(context, finalCollections, dataDir, mapFilePath, template);
} else { // For all other imports (via BTE)
addBTEItems(context, finalCollections, theFilePath, mapFilePath, useTemplateItem, theInputType,
dataDir);
}
// email message letting user know the file is ready for

View File

@@ -183,21 +183,6 @@ public interface ItemImportService {
*/
public void deleteItems(Context c, String mapfile) throws Exception;
/**
* Add items
*
* @param c DSpace Context
* @param mycollections List of Collections
* @param sourcedir source directory
* @param mapfile map file
* @param template whether to use template item
* @param bteInputType The input type of the data (bibtex, csv, etc.), in case of local file
* @param workingDir working directory
* @throws Exception if error
*/
public void addBTEItems(Context c, List<Collection> mycollections, String sourcedir, String mapfile,
boolean template, String bteInputType, String workingDir) throws Exception;
/**
* Get temporary work directory
*

View File

@@ -7,7 +7,6 @@
*/
package org.dspace.submit;
import org.dspace.app.itemimport.BTEBatchImportService;
import org.dspace.authorize.factory.AuthorizeServiceFactory;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.InProgressSubmission;
@@ -38,8 +37,6 @@ public abstract class AbstractProcessingStep {
protected MetadataFieldService metadataFieldService = ContentServiceFactory.getInstance().getMetadataFieldService();
protected ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
protected WorkspaceItemService workspaceItemService = ContentServiceFactory.getInstance().getWorkspaceItemService();
protected BTEBatchImportService bteBatchImportService = DSpaceServicesFactory.getInstance().getServiceManager()
.getServiceByName("org.dspace.app.itemimport" + ".BTEBatchImportService", BTEBatchImportService.class);
public abstract void doPreProcessing(Context context, InProgressSubmission wsi);

View File

@@ -1,54 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.extraction;
import java.util.List;
import gr.ekt.bte.dataloader.FileDataLoader;
import org.dspace.services.ConfigurationService;
/**
* Configuration bean to associate a BTE FileDataLoader with a specific list of format identified by the file
* extensions. See config/spring/api/metadata-extractor.xml
*
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
* @author Andrea Bollini (andrea.bollini at 4science.it)
*/
public class MetadataExtractor {
private List<String> extensions;
private FileDataLoader dataLoader;
private ConfigurationService configurationService;
public List<String> getExtensions() {
return extensions;
}
public void setExtensions(List<String> mime) {
this.extensions = mime;
}
public FileDataLoader getDataLoader() {
return dataLoader;
}
public void setDataLoader(FileDataLoader dataLoader) {
this.dataLoader = dataLoader;
}
public ConfigurationService getConfigurationService() {
return configurationService;
}
public void setConfigurationService(ConfigurationService configurationService) {
this.configurationService = configurationService;
}
}

View File

@@ -7,55 +7,37 @@
*/
package org.dspace.submit.listener;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.DataLoader;
import org.dspace.services.ConfigurationService;
import org.dspace.content.Item;
import org.dspace.core.Context;
import org.dspace.external.model.ExternalDataObject;
/**
* Configuration bean to map metadata to identifiers (i.e dc.identifier.doi -> doi, dc.identifier.isbn -> isbn) and
* alias to BTE Data Loader. See config/spring/api/step-processing.xml
* The interface to implement to support the ExtractMetadata enrichment step
*
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
* @author Andrea Bollini (andrea.bollini at 4science.it)
*
*/
public class MetadataListener {
public interface MetadataListener {
/**
* Return the list of metadata that should be monitored as change to them could
* allow the service to retrieve an ExternalDataObject to enrich the current
* item
*
* @return the list of metadata to monitor
*/
public Set<String> getMetadataToListen();
/**
* Metadata to identifier map
* Retrieve an ExternalDataObject to enrich the current item using the current
* metadata and the information about which listened metadata are changed
*
* @param context the DSpace Context Object
* @param item the item in its current status
* @param changedMetadata the list of listened metadata that are changed
* @return an ExternalDataObject that can be used to enrich the current item
*/
private Map<String, String> metadata;
private ConfigurationService configurationService;
/**
* Alias to data loader map
*/
private Map<String, DataLoader> dataloadersMap;
public ConfigurationService getConfigurationService() {
return configurationService;
}
public void setConfigurationService(ConfigurationService configurationService) {
this.configurationService = configurationService;
}
public Map<String, String> getMetadata() {
return metadata;
}
public void setMetadata(Map<String, String> metadata) {
this.metadata = metadata;
}
public Map<String, DataLoader> getDataloadersMap() {
return dataloadersMap;
}
public void setDataloadersMap(Map<String, DataLoader> dataloadersMap) {
this.dataloadersMap = dataloadersMap;
}
public ExternalDataObject getExternalDataObject(Context context, Item item, Set<String> changedMetadata);
}

View File

@@ -0,0 +1,99 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.listener;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Context;
import org.dspace.external.model.ExternalDataObject;
import org.dspace.external.provider.ExternalDataProvider;
/**
* This is the basic implementation for the MetadataListener interface.
*
* It got the a map of metadata and related External Data Provider that can be
* used to retrieve further information using the updated metadata in the item
*
* @author Andrea Bollini (andrea.bollini at 4science.it)
*
*/
public class SimpleMetadataListener implements MetadataListener {
/**
* A map to link a specific metadata with an ExternalDataProvider
*/
private Map<String, List<ExternalDataProvider>> externalDataProvidersMap;
private ItemService itemService = ContentServiceFactory.getInstance().getItemService();
public Map<String, List<ExternalDataProvider>> getExternalDataProvidersMap() {
return externalDataProvidersMap;
}
public void setExternalDataProvidersMap(Map<String, List<ExternalDataProvider>> externalDataProvidersMap) {
this.externalDataProvidersMap = externalDataProvidersMap;
}
@Override
public Set<String> getMetadataToListen() {
return externalDataProvidersMap.keySet();
}
@Override
public ExternalDataObject getExternalDataObject(Context context, Item item, Set<String> changedMetadata) {
// we loop over the available provider and return the first found object
for (String m : changedMetadata) {
List<ExternalDataProvider> providers = externalDataProvidersMap.get(m);
for (ExternalDataProvider prov : providers) {
String id = generateExternalId(context, prov, item, changedMetadata, m);
if (StringUtils.isNotBlank(id)) {
Optional<ExternalDataObject> result = prov.getExternalDataObject(id);
if (result.isPresent()) {
return result.get();
}
}
}
}
return null;
}
/**
* This is the simpler implementation, it assumes that the value of the metadata
* listened by the DataProvider can be used directly as identifier. Subclass may
* extend it to add support for identifier normalization or combine multiple
* information to build the identifier
*
* @param context the DSpace Context Object
* @param prov the ExternalDataProvider that need to received an Id
* @param item the item
* @param changedMetadata the metadata that are recently changed
* @param m the changed metadata that lead to the selected
* ExternalDataProvider
* @return an Id if any that can be used to query the {@link ExternalDataProvider}
*/
protected String generateExternalId(Context context, ExternalDataProvider prov, Item item,
Set<String> changedMetadata, String m) {
List<MetadataValue> metadataByMetadataString = itemService.getMetadataByMetadataString(item, m);
// only suggest an identifier if there is exactly one value for the metadata. If
// there are more values it is highly probable that a lookup was already
// performed when the first value was added
if (metadataByMetadataString != null && metadataByMetadataString.size() == 1) {
return metadataByMetadataString.get(0).getValue();
}
return null;
}
}

View File

@@ -1,144 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import gr.ekt.bte.core.DataLoadingSpec;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.Value;
import gr.ekt.bte.dataloader.FileDataLoader;
import gr.ekt.bte.exceptions.MalformedSourceException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
/**
* Load metadata from CiNii formated file
*
* @author Keiji Suzuki
*/
public class CiNiiFileDataLoader extends FileDataLoader {
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(CiNiiFileDataLoader.class);
Map<String, String> fieldMap; // mapping between service fields and local
// intermediate fields
/**
* Empty constructor
*/
public CiNiiFileDataLoader() {
}
/**
* @param filename Name of file to load CiNii data from.
*/
public CiNiiFileDataLoader(String filename) {
super(filename);
}
/*
* {@see gr.ekt.bte.core.DataLoader#getRecords()}
*
* @throws MalformedSourceException
*/
@Override
public RecordSet getRecords() throws MalformedSourceException {
RecordSet recordSet = new RecordSet();
try {
InputStream inputStream = new FileInputStream(new File(filename));
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setValidating(false);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder db = factory.newDocumentBuilder();
Document inDoc = db.parse(inputStream);
Element xmlRoot = inDoc.getDocumentElement();
// There is no element to represent an record, so we can not process
// multi records at once.
Record record = CiNiiUtils.convertCiNiiDomToRecord(xmlRoot);
if (record != null) {
recordSet.addRecord(convertFields(record));
}
} catch (FileNotFoundException e) {
log.error(e.getMessage(), e);
} catch (ParserConfigurationException e) {
log.error(e.getMessage(), e);
} catch (SAXException e) {
log.error(e.getMessage(), e);
} catch (IOException e) {
log.error(e.getMessage(), e);
}
return recordSet;
}
/*
* (non-Javadoc)
*
* @see
* gr.ekt.bte.core.DataLoader#getRecords(gr.ekt.bte.core.DataLoadingSpec)
*/
@Override
public RecordSet getRecords(DataLoadingSpec spec)
throws MalformedSourceException {
if (spec.getOffset() > 0) {
return new RecordSet();
}
return getRecords();
}
public Record convertFields(Record publication) {
for (String fieldName : fieldMap.keySet()) {
String md = null;
if (fieldMap != null) {
md = this.fieldMap.get(fieldName);
}
if (StringUtils.isBlank(md)) {
continue;
} else {
md = md.trim();
}
if (publication.isMutable()) {
List<Value> values = publication.getValues(fieldName);
publication.makeMutable().removeField(fieldName);
publication.makeMutable().addField(md, values);
}
}
return publication;
}
public void setFieldMap(Map<String, String> fieldMap) {
this.fieldMap = fieldMap;
}
}

View File

@@ -1,107 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.Record;
import org.apache.http.HttpException;
import org.dspace.core.Context;
/**
* Load metadata from CiNii RDF API
*
* @author Keiji Suzuki
*/
public class CiNiiOnlineDataLoader extends NetworkSubmissionLookupDataLoader {
protected CiNiiService ciniiService = new CiNiiService();
protected boolean searchProvider = true;
/**
* Application id to use CiNii
*/
protected String appId = null;
/**
* max result number to return
*/
protected int maxResults = 10;
public void setCiNiiService(CiNiiService ciniiService) {
this.ciniiService = ciniiService;
}
@Override
public List<String> getSupportedIdentifiers() {
return Arrays.asList(new String[] {CINII});
}
public void setSearchProvider(boolean searchProvider) {
this.searchProvider = searchProvider;
}
@Override
public boolean isSearchProvider() {
return searchProvider;
}
@Override
public List<Record> getByIdentifier(Context context,
Map<String, Set<String>> keys) throws HttpException, IOException {
if (appId == null) {
throw new RuntimeException("No CiNii Application ID is specified!");
}
List<Record> results = new ArrayList<Record>();
if (keys != null) {
Set<String> ciniiids = keys.get(CINII);
if (ciniiids != null && ciniiids.size() > 0) {
for (String ciniiid : ciniiids) {
Record record = ciniiService.getByCiNiiID(ciniiid, getAppId());
if (record != null) {
results.add(convertFields(record));
}
}
}
}
return results;
}
@Override
public List<Record> search(Context context, String title, String author, int year)
throws HttpException, IOException {
if (appId == null) {
throw new RuntimeException("No CiNii Application ID is specified!");
}
return ciniiService.searchByTerm(title, author, year,
getMaxResults(), getAppId());
}
public String getAppId() {
return appId;
}
public void setAppId(String appId) {
this.appId = appId;
}
public int getMaxResults() {
return maxResults;
}
public void setMaxResults(int maxResults) {
this.maxResults = maxResults;
}
}

View File

@@ -1,221 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import gr.ekt.bte.core.Record;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
/**
* @author Keiji Suzuki
*/
public class CiNiiService {
/**
* log4j category
*/
private static final Logger log = LogManager.getLogger(CiNiiService.class);
protected int timeout = 1000;
public void setTimeout(int timeout) {
this.timeout = timeout;
}
public Record getByCiNiiID(String id, String appId) throws HttpException,
IOException {
return search(id, appId);
}
public List<Record> searchByTerm(String title, String author, int year,
int maxResults, String appId)
throws HttpException, IOException {
List<Record> records = new ArrayList<>();
List<String> ids = getCiNiiIDs(title, author, year, maxResults, appId);
if (ids != null && ids.size() > 0) {
for (String id : ids) {
Record record = search(id, appId);
if (record != null) {
records.add(record);
}
}
}
return records;
}
/**
* Get metadata by searching CiNii RDF API with CiNii NAID
*
* @param id CiNii NAID to search by
* @param appId registered application identifier for the API
* @return record metadata
* @throws IOException A general class of exceptions produced by failed or interrupted I/O operations.
* @throws HttpException Represents a XML/HTTP fault and provides access to the HTTP status code.
*/
protected Record search(String id, String appId)
throws IOException, HttpException {
HttpGet method = null;
try ( CloseableHttpClient client = HttpClientBuilder.create().build(); ) {
RequestConfig requestConfig = RequestConfig.custom()
.setConnectTimeout(timeout)
.build();
method = new HttpGet("http://ci.nii.ac.jp/naid/" + id + ".rdf?appid=" + appId);
method.setConfig(requestConfig);
// Execute the method.
HttpResponse response = client.execute(method);
StatusLine statusLine = response.getStatusLine();
int statusCode = statusLine.getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
if (statusCode == HttpStatus.SC_BAD_REQUEST) {
throw new RuntimeException("CiNii RDF is not valid");
} else {
throw new RuntimeException("CiNii RDF Http call failed: "
+ statusLine);
}
}
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setValidating(false);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
// disallow DTD parsing to ensure no XXE attacks can occur.
// See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
DocumentBuilder db = factory.newDocumentBuilder();
Document inDoc = db.parse(response.getEntity().getContent());
Element xmlRoot = inDoc.getDocumentElement();
return CiNiiUtils.convertCiNiiDomToRecord(xmlRoot);
} catch (Exception e) {
throw new RuntimeException(
"CiNii RDF identifier is not valid or not exist");
}
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
/**
* Get CiNii NAIDs by searching CiNii OpenURL API with title, author and year
*
* @param title record title
* @param author record author
* @param year record year
* @param maxResults maximum number of results returned
* @param appId registered application identifier for the API
* @return matching NAIDs
* @throws IOException A general class of exceptions produced by failed or interrupted I/O operations.
* @throws HttpException Represents a XML/HTTP fault and provides access to the HTTP status code.
*/
protected List<String> getCiNiiIDs(String title, String author, int year,
int maxResults, String appId)
throws IOException, HttpException {
// Need at least one query term
if (title == null && author == null && year == -1) {
return null;
}
HttpGet method = null;
List<String> ids = new ArrayList<>();
try ( CloseableHttpClient client = HttpClientBuilder.create().build(); ) {
StringBuilder query = new StringBuilder();
query.append("format=rss&appid=").append(appId)
.append("&count=").append(maxResults);
if (title != null) {
query.append("&title=").append(URLEncoder.encode(title, "UTF-8"));
}
if (author != null) {
query.append("&author=").append(URLEncoder.encode(author, "UTF-8"));
}
if (year != -1) {
query.append("&year_from=").append(String.valueOf(year));
query.append("&year_to=").append(String.valueOf(year));
}
RequestConfig requestConfig = RequestConfig.custom()
.setConnectTimeout(timeout)
.build();
method = new HttpGet("http://ci.nii.ac.jp/opensearch/search?" + query.toString());
method.setConfig(requestConfig);
// Execute the method.
HttpResponse response = client.execute(method);
StatusLine statusLine = response.getStatusLine();
int statusCode = statusLine.getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
if (statusCode == HttpStatus.SC_BAD_REQUEST) {
throw new RuntimeException("CiNii OpenSearch query is not valid");
} else {
throw new RuntimeException("CiNii OpenSearch call failed: "
+ statusLine);
}
}
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setValidating(false);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
// disallow DTD parsing to ensure no XXE attacks can occur.
// See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
DocumentBuilder db = factory.newDocumentBuilder();
Document inDoc = db.parse(response.getEntity().getContent());
Element xmlRoot = inDoc.getDocumentElement();
List<Element> items = XMLUtils.getElementList(xmlRoot, "item");
int url_len = "http://ci.nii.ac.jp/naid/".length();
for (Element item : items) {
String about = item.getAttribute("rdf:about");
if (about.length() > url_len) {
ids.add(about.substring(url_len));
}
}
return ids;
} catch (Exception e) {
throw new RuntimeException(
"CiNii OpenSearch results is not valid or not exist");
}
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
}

View File

@@ -1,225 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
/**
*
*/
package org.dspace.submit.lookup;
import java.util.LinkedList;
import java.util.List;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
import org.dspace.app.util.XMLUtils;
import org.dspace.submit.util.SubmissionLookupPublication;
import org.w3c.dom.Element;
/**
* @author Keiji Suzuki
*/
public class CiNiiUtils {
/**
* Default constructor
*/
private CiNiiUtils() { }
public static Record convertCiNiiDomToRecord(Element xmlRoot) {
MutableRecord record = new SubmissionLookupPublication("");
List<Element> list = XMLUtils.getElementList(xmlRoot, "rdf:Description");
// Valid CiNii record should have three rdf:Description elements
if (list.size() < 3) {
return record;
}
Element description_ja = list.get(0); // Japanese description
Element description_en = list.get(1); // English description
// Element description3 = list.get(2); // Authors information: NOT USE here
String language = XMLUtils.getElementValue(description_ja, "dc:language");
language = language != null ? language.toLowerCase() : "ja";
record.addValue("language", new StringValue(language));
if ("ja".equals(language) || "jpn".equals(language)) {
String title = XMLUtils.getElementValue(description_ja, "dc:title");
if (title != null) {
record.addValue("title", new StringValue(title));
}
String titleAlternative = XMLUtils.getElementValue(description_en, "dc:title");
if (titleAlternative != null) {
record.addValue("titleAlternative", new StringValue(titleAlternative));
}
List<Value> authors = getAuthors(description_ja);
if (authors.size() > 0) {
record.addField("authors", authors);
}
List<Value> authorAlternative = getAuthors(description_en);
if (authorAlternative.size() > 0) {
record.addField("auhtorAlternative", authorAlternative);
}
String publisher = XMLUtils.getElementValue(description_ja, "dc:publisher");
if (publisher != null) {
record.addValue("publisher", new StringValue(publisher));
}
} else {
String title = XMLUtils.getElementValue(description_en, "dc:title");
if (title != null) {
record.addValue("title", new StringValue(title));
}
String titleAlternative = XMLUtils.getElementValue(description_ja, "dc:title");
if (titleAlternative != null) {
record.addValue("titleAlternative", new StringValue(titleAlternative));
}
List<Value> authors = getAuthors(description_en);
if (authors.size() > 0) {
record.addField("authors", authors);
}
List<Value> authorAlternative = getAuthors(description_ja);
if (authorAlternative.size() > 0) {
record.addField("authorAlternative", authorAlternative);
}
String publisher = XMLUtils.getElementValue(description_en, "dc:publisher");
if (publisher != null) {
record.addValue("publisher", new StringValue(publisher));
}
}
String abstract_ja = XMLUtils.getElementValue(description_ja, "dc:description");
String abstract_en = XMLUtils.getElementValue(description_en, "dc:description");
if (abstract_ja != null && abstract_en != null) {
List<Value> description = new LinkedList<Value>();
description.add(new StringValue(abstract_ja));
description.add(new StringValue(abstract_en));
record.addField("description", description);
} else if (abstract_ja != null) {
record.addValue("description", new StringValue(abstract_ja));
} else if (abstract_en != null) {
record.addValue("description", new StringValue(abstract_en));
}
List<Value> subjects = getSubjects(description_ja);
subjects.addAll(getSubjects(description_en));
if (subjects.size() > 0) {
record.addField("subjects", subjects);
}
String journal_j = XMLUtils.getElementValue(description_ja, "prism:publicationName");
String journal_e = XMLUtils.getElementValue(description_en, "prism:publicationName");
if (journal_j != null && journal_e != null) {
record.addValue("journal", new StringValue(journal_j + " = " + journal_e));
} else if (journal_j != null) {
record.addValue("journal", new StringValue(journal_j));
} else if (journal_e != null) {
record.addValue("journal", new StringValue(journal_e));
}
String volume = XMLUtils.getElementValue(description_ja, "prism:volume");
if (volume != null) {
record.addValue("volume", new StringValue(volume));
}
String issue = XMLUtils.getElementValue(description_ja, "prism:number");
if (issue != null) {
record.addValue("issue", new StringValue(issue));
}
String spage = XMLUtils.getElementValue(description_ja, "prism:startingPage");
if (spage != null) {
record.addValue("spage", new StringValue(spage));
}
String epage = XMLUtils.getElementValue(description_ja, "prism:endingPage");
if (epage != null) {
record.addValue("epage", new StringValue(epage));
}
String pages = XMLUtils.getElementValue(description_ja, "prism:pageRange");
if (pages != null && spage == null) {
int pos = pages.indexOf("-");
if (pos > -1) {
spage = pages.substring(0, pos);
epage = pages.substring(pos + 1, pages.length() - pos);
if (!epage.equals("") && spage.length() > epage.length()) {
epage = spage.substring(0, spage.length() - epage.length()) + epage;
}
} else {
spage = pages;
epage = "";
}
record.addValue("spage", new StringValue(spage));
if (!epage.equals("") && epage == null) {
record.addValue("epage", new StringValue(epage));
}
}
String issn = XMLUtils.getElementValue(description_ja, "prism:issn");
if (issn != null) {
record.addValue("issn", new StringValue(issn));
}
String issued = XMLUtils.getElementValue(description_ja, "prism:publicationDate");
if (issued != null) {
record.addValue("issued", new StringValue(issued));
}
String ncid = XMLUtils.getElementValue(description_ja, "cinii:ncid");
if (ncid != null) {
record.addValue("ncid", new StringValue(ncid));
}
String naid = XMLUtils.getElementValue(description_ja, "cinii:naid");
if (naid != null) {
record.addValue("naid", new StringValue(naid));
}
return record;
}
private static List<Value> getAuthors(Element element) {
List<Value> authors = new LinkedList<Value>();
List<String> authorList = XMLUtils.getElementValueList(element, "dc:creator");
if (authorList != null && authorList.size() > 0) {
for (String author : authorList) {
int pos = author.indexOf(" ");
if (pos > -1) {
author = author.substring(0, pos) + "," + author.substring(pos);
}
authors.add(new StringValue(author));
}
}
return authors;
}
private static List<Value> getSubjects(Element element) {
List<Value> subjects = new LinkedList<Value>();
List<Element> topicList = XMLUtils.getElementList(element, "foaf:topic");
String attrValue = null;
for (Element topic : topicList) {
attrValue = topic.getAttribute("dc:title");
if (StringUtils.isNotBlank(attrValue)) {
subjects.add(new StringValue(attrValue.trim()));
}
}
return subjects;
}
}

View File

@@ -1,142 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import gr.ekt.bte.core.DataLoadingSpec;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.Value;
import gr.ekt.bte.dataloader.FileDataLoader;
import gr.ekt.bte.exceptions.MalformedSourceException;
import org.apache.commons.lang3.StringUtils;
import org.dspace.app.util.XMLUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class CrossRefFileDataLoader extends FileDataLoader {
Map<String, String> fieldMap; // mapping between service fields and local
// intermediate fields
/**
*
*/
public CrossRefFileDataLoader() {
}
/**
* @param filename Name of file to load ArXiv data from.
*/
public CrossRefFileDataLoader(String filename) {
super(filename);
}
/*
* (non-Javadoc)
*
* @see gr.ekt.bte.core.DataLoader#getRecords()
*/
@Override
public RecordSet getRecords() throws MalformedSourceException {
RecordSet recordSet = new RecordSet();
try {
InputStream inputStream = new FileInputStream(new File(filename));
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setValidating(false);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder db = factory.newDocumentBuilder();
Document inDoc = db.parse(inputStream);
Element xmlRoot = inDoc.getDocumentElement();
Element queryResult = XMLUtils.getSingleElement(xmlRoot, "query_result");
Element body = XMLUtils.getSingleElement(queryResult, "body");
Element dataRoot = XMLUtils.getSingleElement(body, "query");
Record record = CrossRefUtils.convertCrossRefDomToRecord(dataRoot);
recordSet.addRecord(convertFields(record));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return recordSet;
}
/*
* (non-Javadoc)
*
* @see
* gr.ekt.bte.core.DataLoader#getRecords(gr.ekt.bte.core.DataLoadingSpec)
*/
@Override
public RecordSet getRecords(DataLoadingSpec spec)
throws MalformedSourceException {
if (spec.getOffset() > 0) {
return new RecordSet();
}
return getRecords();
}
public Record convertFields(Record publication) {
for (String fieldName : fieldMap.keySet()) {
String md = null;
if (fieldMap != null) {
md = this.fieldMap.get(fieldName);
}
if (StringUtils.isBlank(md)) {
continue;
} else {
md = md.trim();
}
if (publication.isMutable()) {
List<Value> values = publication.getValues(fieldName);
publication.makeMutable().removeField(fieldName);
publication.makeMutable().addField(md, values);
}
}
return publication;
}
public void setFieldMap(Map<String, String> fieldMap) {
this.fieldMap = fieldMap;
}
}

View File

@@ -1,112 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import gr.ekt.bte.core.Record;
import org.apache.http.HttpException;
import org.dspace.core.Context;
import org.jdom.JDOMException;
import org.xml.sax.SAXException;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class CrossRefOnlineDataLoader extends NetworkSubmissionLookupDataLoader {
protected CrossRefService crossrefService = new CrossRefService();
protected boolean searchProvider = true;
protected String apiKey = null;
protected int maxResults = 10;
public void setSearchProvider(boolean searchProvider) {
this.searchProvider = searchProvider;
}
public void setCrossrefService(CrossRefService crossrefService) {
this.crossrefService = crossrefService;
}
@Override
public List<String> getSupportedIdentifiers() {
return Arrays.asList(new String[] {DOI});
}
@Override
public List<Record> getByIdentifier(Context context,
Map<String, Set<String>> keys) throws HttpException, IOException {
if (keys != null && keys.containsKey(DOI)) {
Set<String> dois = keys.get(DOI);
List<Record> items = null;
List<Record> results = new ArrayList<Record>();
if (getApiKey() == null) {
throw new RuntimeException("No CrossRef API key is specified!");
}
try {
items = crossrefService.search(context, dois, getApiKey());
} catch (JDOMException e) {
throw new RuntimeException(e.getMessage(), e);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e.getMessage(), e);
} catch (SAXException e) {
throw new RuntimeException(e.getMessage(), e);
}
for (Record record : items) {
results.add(convertFields(record));
}
return results;
}
return null;
}
@Override
public List<Record> search(Context context, String title, String author,
int year) throws HttpException, IOException {
if (getApiKey() == null) {
throw new RuntimeException("No CrossRef API key is specified!");
}
List<Record> items = crossrefService.search(context, title, author,
year, getMaxResults(), getApiKey());
return items;
}
@Override
public boolean isSearchProvider() {
return searchProvider;
}
public String getApiKey() {
return apiKey;
}
public void setApiKey(String apiKey) {
this.apiKey = apiKey;
}
public int getMaxResults() {
return maxResults;
}
public void setMaxResults(int maxResults) {
this.maxResults = maxResults;
}
}

View File

@@ -1,204 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.IOException;
import java.lang.reflect.Type;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import gr.ekt.bte.core.Record;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.XMLUtils;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.jdom.JDOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class CrossRefService {
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(CrossRefService.class);
protected int timeout = 1000;
public void setTimeout(int timeout) {
this.timeout = timeout;
}
public List<Record> search(Context context, Set<String> dois, String apiKey)
throws HttpException, IOException, JDOMException,
ParserConfigurationException, SAXException {
List<Record> results = new ArrayList<>();
if (dois != null && dois.size() > 0) {
for (String record : dois) {
try {
HttpGet method = null;
try {
HttpClient client = HttpClientBuilder.create().build();
try {
URIBuilder uriBuilder = new URIBuilder(
"http://www.crossref.org/openurl/");
uriBuilder.addParameter("pid", apiKey);
uriBuilder.addParameter("noredirect", "true");
uriBuilder.addParameter("id", record);
method = new HttpGet(uriBuilder.build());
RequestConfig requestConfig = RequestConfig.custom()
.setConnectTimeout(timeout)
.build();
method.setConfig(requestConfig);
} catch (URISyntaxException ex) {
throw new HttpException("Request not sent", ex);
}
// Execute the method.
HttpResponse response = client.execute(method);
StatusLine statusLine = response.getStatusLine();
int statusCode = statusLine.getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
throw new RuntimeException("Http call failed: "
+ statusLine);
}
Record crossitem;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setValidating(false);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
// disallow DTD parsing to ensure no XXE attacks can occur.
// See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
DocumentBuilder db = factory
.newDocumentBuilder();
Document inDoc = db.parse(response.getEntity().getContent());
Element xmlRoot = inDoc.getDocumentElement();
Element queryResult = XMLUtils.getSingleElement(xmlRoot, "query_result");
Element body = XMLUtils.getSingleElement(queryResult, "body");
Element dataRoot = XMLUtils.getSingleElement(body, "query");
crossitem = CrossRefUtils
.convertCrossRefDomToRecord(dataRoot);
results.add(crossitem);
} catch (Exception e) {
log.warn(LogManager
.getHeader(
context,
"retrieveRecordDOI",
record
+ " DOI is not valid or not exist: "
+ e.getMessage()));
}
} finally {
if (method != null) {
method.releaseConnection();
}
}
} catch (RuntimeException rt) {
log.error(rt.getMessage(), rt);
}
}
}
return results;
}
public List<Record> search(Context context, String title, String authors,
int year, int count, String apiKey) throws IOException, HttpException {
HttpGet method = null;
try ( CloseableHttpClient client = HttpClientBuilder.create().build(); ) {
URIBuilder uriBuilder = new URIBuilder("http://search.labs.crossref.org/dois");
StringBuilder sb = new StringBuilder();
if (StringUtils.isNotBlank(title)) {
sb.append(title);
}
sb.append(" ");
if (StringUtils.isNotBlank(authors)) {
sb.append(authors);
}
String q = sb.toString().trim();
uriBuilder.addParameter("q", q);
uriBuilder.addParameter("year", year != -1 ? String.valueOf(year) : "");
uriBuilder.addParameter("rows", count != -1 ? String.valueOf(count) : "");
method = new HttpGet(uriBuilder.build());
RequestConfig requestConfig = RequestConfig.custom()
.setConnectTimeout(timeout)
.build();
method.setConfig(requestConfig);
// Execute the method.
HttpResponse response = client.execute(method);
StatusLine statusLine = response.getStatusLine();
int statusCode = statusLine.getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
throw new RuntimeException("Http call failed:: "
+ statusLine);
}
Gson gson = new Gson();
Type listType = new TypeToken<ArrayList<Map>>() {
}.getType();
List<Map> json = gson.fromJson(
IOUtils.toString(response.getEntity().getContent(), StandardCharsets.UTF_8),
listType);
Set<String> dois = new HashSet<>();
for (Map r : json) {
dois.add(SubmissionLookupUtils.normalizeDOI((String) r
.get("doi")));
}
method.releaseConnection();
return search(context, dois, apiKey);
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
}

View File

@@ -1,216 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
/**
*
*/
package org.dspace.submit.lookup;
import java.util.LinkedList;
import java.util.List;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
import org.dspace.app.util.XMLUtils;
import org.dspace.submit.util.SubmissionLookupPublication;
import org.w3c.dom.Element;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class CrossRefUtils {
/**
* Default constructor
*/
private CrossRefUtils() { }
public static Record convertCrossRefDomToRecord(Element dataRoot) {
MutableRecord record = new SubmissionLookupPublication("");
String status = dataRoot.getAttribute("status");
if (!"resolved".equals(status)) {
String msg = XMLUtils.getElementValue(dataRoot, "msg");
String exMsg = status + " - " + msg;
throw new RuntimeException(exMsg);
}
String doi = XMLUtils.getElementValue(dataRoot, "doi");
if (doi != null) {
record.addValue("doi", new StringValue(doi));
}
String itemType = doi != null ? XMLUtils.getElementAttribute(dataRoot,
"doi", "type") : "unspecified";
if (itemType != null) {
record.addValue("doiType", new StringValue(itemType));
}
List<Element> identifier = XMLUtils.getElementList(dataRoot, "issn");
for (Element ident : identifier) {
if ("print".equalsIgnoreCase(ident.getAttribute("type"))
|| StringUtils.isNotBlank(ident.getAttribute("type"))) {
String issn = ident.getTextContent().trim();
if (issn != null) {
record.addValue("printISSN", new StringValue(issn));
}
} else {
String eissn = ident.getTextContent().trim();
if (eissn != null) {
record.addValue("electronicISSN", new StringValue(eissn));
}
}
}
List<Element> identifierisbn = XMLUtils.getElementList(dataRoot, "isbn");
for (Element ident : identifierisbn) {
if ("print".equalsIgnoreCase(ident.getAttribute("type"))
|| StringUtils.isNotBlank(ident.getAttribute("type"))) {
String issn = ident.getTextContent().trim();
if (issn != null) {
record.addValue("printISBN", new StringValue(issn));
}
} else {
String eissn = ident.getTextContent().trim();
if (eissn != null) {
record.addValue("electronicISBN", new StringValue(eissn));
}
}
}
String editionNumber = XMLUtils.getElementValue(dataRoot,
"editionNumber");
if (editionNumber != null) {
record.addValue("editionNumber", new StringValue(editionNumber));
}
String volume = XMLUtils.getElementValue(dataRoot, "volume");
if (volume != null) {
record.addValue("volume", new StringValue(volume));
}
String issue = XMLUtils.getElementValue(dataRoot, "issue");
if (issue != null) {
record.addValue("issue", new StringValue(issue));
}
String year = XMLUtils.getElementValue(dataRoot, "year");
if (year != null) {
record.addValue("year", new StringValue(year));
}
String firstPage = XMLUtils.getElementValue(dataRoot, "first_page");
if (firstPage != null) {
record.addValue("firstPage", new StringValue(firstPage));
}
String lastPage = XMLUtils.getElementValue(dataRoot, "last_page");
if (lastPage != null) {
record.addValue("lastPage", new StringValue(lastPage));
}
String seriesTitle = XMLUtils.getElementValue(dataRoot, "series_title");
if (seriesTitle != null) {
record.addValue("seriesTitle", new StringValue(seriesTitle));
}
String journalTitle = XMLUtils.getElementValue(dataRoot,
"journal_title");
if (journalTitle != null) {
record.addValue("journalTitle", new StringValue(journalTitle));
}
String volumeTitle = XMLUtils.getElementValue(dataRoot, "volume_title");
if (volumeTitle != null) {
record.addValue("volumeTitle", new StringValue(volumeTitle));
}
String articleTitle = XMLUtils.getElementValue(dataRoot,
"article_title");
if (articleTitle != null) {
record.addValue("articleTitle", new StringValue(articleTitle));
}
String publicationType = XMLUtils.getElementValue(dataRoot,
"pubblication_type");
if (publicationType != null) {
record.addValue("publicationType", new StringValue(publicationType));
}
List<String[]> authors = new LinkedList<String[]>();
List<String[]> editors = new LinkedList<String[]>();
List<String[]> translators = new LinkedList<String[]>();
List<String[]> chairs = new LinkedList<String[]>();
List<Element> contributors = XMLUtils.getElementList(dataRoot,
"contributors");
List<Element> contributor = null;
if (contributors != null && contributors.size() > 0) {
contributor = XMLUtils.getElementList(contributors.get(0),
"contributor");
for (Element contrib : contributor) {
String givenName = XMLUtils.getElementValue(contrib,
"given_name");
String surname = XMLUtils.getElementValue(contrib, "surname");
if ("editor".equalsIgnoreCase(contrib
.getAttribute("contributor_role"))) {
editors.add(new String[] {givenName, surname});
} else if ("chair".equalsIgnoreCase(contrib
.getAttribute("contributor_role"))) {
chairs.add(new String[] {givenName, surname});
} else if ("translator".equalsIgnoreCase(contrib
.getAttribute("contributor_role"))) {
translators.add(new String[] {givenName, surname});
} else {
authors.add(new String[] {givenName, surname});
}
}
}
if (authors.size() > 0) {
List<Value> values = new LinkedList<Value>();
for (String[] sArray : authors) {
values.add(new StringValue(sArray[1] + ", " + sArray[0]));
}
record.addField("authors", values);
}
if (editors.size() > 0) {
List<Value> values = new LinkedList<Value>();
for (String[] sArray : editors) {
values.add(new StringValue(sArray[1] + ", " + sArray[0]));
}
record.addField("editors", values);
}
if (translators.size() > 0) {
List<Value> values = new LinkedList<Value>();
for (String[] sArray : translators) {
values.add(new StringValue(sArray[1] + ", " + sArray[0]));
}
record.addField("translators", values);
}
if (chairs.size() > 0) {
List<Value> values = new LinkedList<Value>();
for (String[] sArray : chairs) {
values.add(new StringValue(sArray[1] + ", " + sArray[0]));
}
record.addField("chairs", values);
}
return record;
}
}

View File

@@ -1,364 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.DataOutputSpec;
import gr.ekt.bte.core.OutputGenerator;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.app.util.DCInput;
import org.dspace.app.util.DCInputSet;
import org.dspace.app.util.DCInputsReader;
import org.dspace.app.util.DCInputsReaderException;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Item;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchema;
import org.dspace.content.WorkspaceItem;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.MetadataFieldService;
import org.dspace.content.service.MetadataSchemaService;
import org.dspace.content.service.WorkspaceItemService;
import org.dspace.core.Context;
import org.dspace.submit.util.ItemSubmissionLookupDTO;
import org.springframework.beans.factory.annotation.Autowired;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class DSpaceWorkspaceItemOutputGenerator implements OutputGenerator {
private static Logger log = LogManager.getLogger(DSpaceWorkspaceItemOutputGenerator.class);
protected Context context;
protected String formName;
protected List<WorkspaceItem> witems;
protected ItemSubmissionLookupDTO dto;
protected Collection collection;
Map<String, String> outputMap;
protected List<String> extraMetadataToKeep;
@Autowired(required = true)
protected ItemService itemService;
@Autowired(required = true)
protected MetadataFieldService metadataFieldService;
@Autowired(required = true)
protected MetadataSchemaService metadataSchemaService;
@Autowired(required = true)
protected WorkspaceItemService workspaceItemService;
@Override
public List<String> generateOutput(RecordSet recordSet) {
log.info("BTE OutputGenerator started. Records to output: "
+ recordSet.getRecords().size());
// Printing debug message
String totalString = "";
for (Record record : recordSet.getRecords()) {
totalString += SubmissionLookupUtils.getPrintableString(record)
+ "\n";
}
log.debug("Records to output:\n" + totalString);
witems = new ArrayList<WorkspaceItem>();
for (Record rec : recordSet.getRecords()) {
try {
WorkspaceItem wi = workspaceItemService.create(context, collection,
true);
merge(formName, wi.getItem(), rec);
witems.add(wi);
} catch (AuthorizeException e) {
log.error(e.getMessage(), e);
} catch (SQLException e) {
log.error(e.getMessage(), e);
}
}
return new ArrayList<String>();
}
@Override
public List<String> generateOutput(RecordSet records, DataOutputSpec spec) {
return generateOutput(records);
}
public List<WorkspaceItem> getWitems() {
return witems;
}
public void setContext(Context context) {
this.context = context;
}
public void setFormName(String formName) {
this.formName = formName;
}
public void setDto(ItemSubmissionLookupDTO dto) {
this.dto = dto;
}
public void setOutputMap(Map<String, String> outputMap) {
// Reverse the key-value pairs
this.outputMap = new HashMap<String, String>();
for (String key : outputMap.keySet()) {
this.outputMap.put(outputMap.get(key), key);
}
}
public void setCollection(Collection collection) {
this.collection = collection;
}
public void setExtraMetadataToKeep(List<String> extraMetadataToKeep) {
this.extraMetadataToKeep = extraMetadataToKeep;
}
// Methods
public void merge(String formName, Item item, Record record) {
try {
Record itemLookup = record;
Set<String> addedMetadata = new HashSet<String>();
for (String field : itemLookup.getFields()) {
String metadata = getMetadata(formName, itemLookup, field);
if (StringUtils.isBlank(metadata)) {
continue;
}
if (itemService.getMetadataByMetadataString(item, metadata).size() == 0
|| addedMetadata.contains(metadata)) {
addedMetadata.add(metadata);
String[] md = splitMetadata(metadata);
if (isValidMetadata(formName, md)) { // if in extra metadata or in the spefific form
List<Value> values = itemLookup.getValues(field);
if (values != null && values.size() > 0) {
if (isRepeatableMetadata(formName, md)) { // if metadata is repeatable in form
for (Value value : values) {
String[] splitValue = splitValue(value
.getAsString());
if (splitValue[3] != null) {
itemService.addMetadata(context, item, md[0], md[1], md[2],
md[3], splitValue[0],
splitValue[1],
Integer.parseInt(splitValue[2]));
} else {
itemService.addMetadata(context, item, md[0], md[1], md[2],
md[3], value.getAsString());
}
}
} else {
String value = values.iterator().next()
.getAsString();
String[] splitValue = splitValue(value);
if (splitValue[3] != null) {
itemService.addMetadata(context, item, md[0], md[1], md[2], md[3],
splitValue[0], splitValue[1],
Integer.parseInt(splitValue[2]));
} else {
itemService.addMetadata(context, item, md[0], md[1], md[2], md[3],
value);
}
}
}
}
}
}
itemService.update(context, item);
} catch (SQLException e) {
log.error(e.getMessage(), e);
} catch (AuthorizeException e) {
log.error(e.getMessage(), e);
}
}
protected String getMetadata(String formName, Record itemLookup, String name) {
String type = SubmissionLookupService.getType(itemLookup);
String md = outputMap.get(type + "." + name);
if (StringUtils.isBlank(md)) {
md = outputMap.get(formName + "." + name);
if (StringUtils.isBlank(md)) {
md = outputMap.get(name);
}
}
// KSTA:ToDo: Make this a modifier
if (md != null && md.contains("|")) {
String[] cond = md.trim().split("\\|");
for (int idx = 1; idx < cond.length; idx++) {
boolean temp = itemLookup.getFields().contains(cond[idx]);
if (temp) {
return null;
}
}
return cond[0];
}
return md;
}
protected String[] splitMetadata(String metadata) {
String[] mdSplit = new String[3];
if (StringUtils.isNotBlank(metadata)) {
String tmpSplit[] = metadata.split("\\.");
if (tmpSplit.length == 4) {
mdSplit = new String[4];
mdSplit[0] = tmpSplit[0];
mdSplit[1] = tmpSplit[1];
mdSplit[2] = tmpSplit[2];
mdSplit[3] = tmpSplit[3];
} else if (tmpSplit.length == 3) {
mdSplit = new String[4];
mdSplit[0] = tmpSplit[0];
mdSplit[1] = tmpSplit[1];
mdSplit[2] = tmpSplit[2];
mdSplit[3] = null;
} else if (tmpSplit.length == 2) {
mdSplit = new String[4];
mdSplit[0] = tmpSplit[0];
mdSplit[1] = tmpSplit[1];
mdSplit[2] = null;
mdSplit[3] = null;
}
}
return mdSplit;
}
protected boolean isValidMetadata(String formName, String[] md) {
try {
if (extraMetadataToKeep != null
&& extraMetadataToKeep.contains(StringUtils.join(
Arrays.copyOfRange(md, 0, 3), "."))) {
return true;
}
return getDCInput(formName, md[0], md[1], md[2]) != null;
} catch (Exception e) {
log.error(e.getMessage(), e);
}
return false;
}
protected DCInput getDCInput(String formName, String schema, String element,
String qualifier) throws DCInputsReaderException {
List<DCInputSet> dcinputsets = new DCInputsReader().getInputsBySubmissionName(formName);
for (DCInputSet dcinputset : dcinputsets) {
for (DCInput[] dcrow : dcinputset.getFields()) {
for (DCInput dcinput : dcrow) {
if (dcinput.getSchema().equals(schema)
&& dcinput.getElement().equals(element)
&& ((dcinput.getQualifier() != null && dcinput
.getQualifier().equals(qualifier))
|| (dcinput.getQualifier() == null && qualifier == null))) {
return dcinput;
}
}
}
}
return null;
}
protected boolean isRepeatableMetadata(String formName, String[] md) {
try {
DCInput dcinput = getDCInput(formName, md[0], md[1], md[2]);
if (dcinput != null) {
return dcinput.isRepeatable();
}
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
protected String[] splitValue(String value) {
String[] splitted = value
.split(SubmissionLookupService.SEPARATOR_VALUE_REGEX);
String[] result = new String[6];
result[0] = splitted[0];
result[2] = "-1";
result[3] = "-1";
result[4] = "-1";
if (splitted.length > 1) {
result[5] = "splitted";
if (StringUtils.isNotBlank(splitted[1])) {
result[1] = splitted[1];
}
if (splitted.length > 2) {
result[2] = String.valueOf(Integer.parseInt(splitted[2]));
if (splitted.length > 3) {
result[3] = String.valueOf(Integer.parseInt(splitted[3]));
if (splitted.length > 4) {
result[4] = String.valueOf(Integer
.parseInt(splitted[4]));
}
}
}
}
return result;
}
protected void makeSureMetadataExist(Context context, String schema,
String element, String qualifier) {
try {
context.turnOffAuthorisationSystem();
boolean create = false;
MetadataSchema mdschema = metadataSchemaService.find(context, schema);
MetadataField mdfield = null;
if (mdschema == null) {
mdschema = metadataSchemaService.create(context, schema,
SubmissionLookupService.SL_NAMESPACE_PREFIX + schema
);
create = true;
} else {
mdfield = metadataFieldService.findByElement(context,
mdschema, element, qualifier);
}
if (mdfield == null) {
metadataFieldService.create(context, mdschema, element, qualifier,
"Campo utilizzato per la cache del provider submission-lookup: "
+ schema);
create = true;
}
if (create) {
context.complete();
}
context.restoreAuthSystemState();
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@@ -1,64 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.List;
import java.util.Map;
import gr.ekt.bte.core.AbstractModifier;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.Value;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class FieldMergeModifier extends AbstractModifier {
protected Map<String, List<String>> mergeFieldMap;
public FieldMergeModifier() {
super("FieldMergeModifier");
}
@Override
public Record modify(MutableRecord rec) {
if (mergeFieldMap != null) {
for (String target_field : mergeFieldMap.keySet()) {
List<String> source_fields = mergeFieldMap.get(target_field);
for (String source_field : source_fields) {
List<Value> values = rec.getValues(source_field);
if (values != null && values.size() > 0) {
for (Value value : values) {
rec.addValue(target_field, value);
}
}
// rec.removeField(source_field);
}
}
}
return rec;
}
/**
* @return the merge_field_map
*/
public Map<String, List<String>> getMergeFieldMap() {
return mergeFieldMap;
}
/**
* @param merge_field_map the merge_field_map to set
*/
public void setMergeFieldMap(Map<String, List<String>> merge_field_map) {
this.mergeFieldMap = merge_field_map;
}
}

View File

@@ -1,78 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.MissingResourceException;
import gr.ekt.bte.core.AbstractModifier;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.core.Value;
import org.springframework.beans.factory.InitializingBean;
/**
* Modifier to covert ISO 639-2 alpha-3 code to ISO 639-1 alpha-2 code
*
* @author Keiji Suzuki
*/
public class LanguageCodeModifier extends AbstractModifier implements InitializingBean {
protected static Map<String, String> lang3to2 = null;
@Override
public void afterPropertiesSet() throws Exception {
lang3to2 = new HashMap<String, String>();
for (Locale locale : Locale.getAvailableLocales()) {
try {
lang3to2.put(locale.getISO3Language(), locale.getLanguage());
} catch (MissingResourceException e) {
continue;
}
}
}
public LanguageCodeModifier() {
super("LanguageCodeModifier");
}
@Override
public Record modify(MutableRecord rec) {
List<Value> old_values = rec.getValues("language");
if (old_values == null || old_values.size() == 0) {
return rec;
}
List<Value> new_values = new ArrayList<Value>();
for (Value value : old_values) {
String lang3 = value.getAsString();
String lang2 = lang3.length() == 3 ? getLang2(lang3) : lang3;
new_values.add(new StringValue(lang2));
}
rec.updateField("language", new_values);
return rec;
}
/**
* Covert ISO 639-2 alpha-3 code to ISO 639-1 alpha-2 code
*
* @param lang3 ISO 639-1 alpha-3 language code
* @return String ISO 639-1 alpha-2 language code ("other" if code is not alpha-2)
*/
protected String getLang2(String lang3) {
return lang3to2.containsKey(lang3) ? lang3to2.get(lang3) : "other";
}
}

View File

@@ -1,40 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.ArrayList;
import java.util.List;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class LookupProvidersCheck {
private List<String> providersOk = new ArrayList<String>();
private List<String> providersErr = new ArrayList<String>();
public List<String> getProvidersOk() {
return providersOk;
}
public void setProvidersOk(List<String> providersOk) {
this.providersOk = providersOk;
}
public List<String> getProvidersErr() {
return providersErr;
}
public void setProvidersErr(List<String> providersErr) {
this.providersErr = providersErr;
}
}

View File

@@ -1,181 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import gr.ekt.bte.core.AbstractModifier;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
import org.dspace.services.ConfigurationService;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class MapConverterModifier extends AbstractModifier {
protected String mappingFile; //The properties absolute filename
protected String converterNameFile; //The properties filename
protected ConfigurationService configurationService;
protected Map<String, String> mapping;
protected String defaultValue = "";
protected List<String> fieldKeys;
protected Map<String, String> regexConfig = new HashMap<String, String>();
public final String REGEX_PREFIX = "regex.";
public void init() {
this.mappingFile = configurationService.getProperty(
"dspace.dir") + File.separator + "config" + File.separator + "crosswalks" + File.separator +
converterNameFile;
this.mapping = new HashMap<String, String>();
FileInputStream fis = null;
try {
fis = new FileInputStream(new File(mappingFile));
Properties mapConfig = new Properties();
mapConfig.load(fis);
fis.close();
for (Object key : mapConfig.keySet()) {
String keyS = (String) key;
if (keyS.startsWith(REGEX_PREFIX)) {
String regex = keyS.substring(REGEX_PREFIX.length());
String regReplace = mapping.get(keyS);
if (regReplace == null) {
regReplace = "";
} else if (regReplace.equalsIgnoreCase("@ident@")) {
regReplace = "$0";
}
regexConfig.put(regex, regReplace);
}
if (mapConfig.getProperty(keyS) != null) {
mapping.put(keyS, mapConfig.getProperty(keyS));
} else {
mapping.put(keyS, "");
}
}
} catch (Exception e) {
throw new IllegalArgumentException("", e);
} finally {
if (fis != null) {
try {
fis.close();
} catch (IOException ioe) {
// ...
}
}
}
for (String keyS : mapping.keySet()) {
if (keyS.startsWith(REGEX_PREFIX)) {
String regex = keyS.substring(REGEX_PREFIX.length());
String regReplace = mapping.get(keyS);
if (regReplace == null) {
regReplace = "";
} else if (regReplace.equalsIgnoreCase("@ident@")) {
regReplace = "$0";
}
regexConfig.put(regex, regReplace);
}
}
}
/**
* @param name Name of file to load ArXiv data from.
*/
public MapConverterModifier(String name) {
super(name);
}
/*
* (non-Javadoc)
*
* @see
* gr.ekt.bte.core.AbstractModifier#modify(gr.ekt.bte.core.MutableRecord)
*/
@Override
public Record modify(MutableRecord record) {
if (mapping != null && fieldKeys != null) {
for (String key : fieldKeys) {
List<Value> values = record.getValues(key);
if (values == null) {
continue;
}
List<Value> newValues = new ArrayList<Value>();
for (Value value : values) {
String stringValue = value.getAsString();
String tmp = "";
if (mapping.containsKey(stringValue)) {
tmp = mapping.get(stringValue);
} else {
tmp = defaultValue;
for (String regex : regexConfig.keySet()) {
if (stringValue != null
&& stringValue.matches(regex)) {
tmp = stringValue.replaceAll(regex,
regexConfig.get(regex));
}
}
}
if ("@@ident@@".equals(tmp)) {
newValues.add(new StringValue(stringValue));
} else if (StringUtils.isNotBlank(tmp)) {
newValues.add(new StringValue(tmp));
} else {
newValues.add(new StringValue(stringValue));
}
}
record.updateField(key, newValues);
}
}
return record;
}
public void setFieldKeys(List<String> fieldKeys) {
this.fieldKeys = fieldKeys;
}
public void setDefaultValue(String defaultValue) {
this.defaultValue = defaultValue;
}
public void setConverterNameFile(String converterNameFile) {
this.converterNameFile = converterNameFile;
}
public void setConfigurationService(ConfigurationService configurationService) {
this.configurationService = configurationService;
}
}

View File

@@ -1,291 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.DataLoadingSpec;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.dataloader.FileDataLoader;
import gr.ekt.bte.exceptions.MalformedSourceException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.core.Context;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class MultipleSubmissionLookupDataLoader implements DataLoader {
private static Logger log = LogManager.getLogger(MultipleSubmissionLookupDataLoader.class);
protected final String NOT_FOUND_DOI = "NOT-FOUND-DOI";
Map<String, DataLoader> dataloadersMap;
// Depending on these values, the multiple data loader loads data from the
// appropriate providers
Map<String, Set<String>> identifiers = null; // Searching by identifiers
// (DOI ...)
Map<String, Set<String>> searchTerms = null; // Searching by author, title,
// date
String filename = null; // Uploading file
String type = null; // the type of the upload file (bibtex, etc.)
/*
* (non-Javadoc)
*
* @see gr.ekt.bte.core.DataLoader#getRecords()
*/
@Override
public RecordSet getRecords() throws MalformedSourceException {
RecordSet recordSet = new RecordSet();
// KSTA:ToDo: Support timeout (problematic) providers
// List<String> timeoutProviders = new ArrayList<String>();
for (String providerName : filterProviders().keySet()) {
DataLoader provider = dataloadersMap.get(providerName);
RecordSet subRecordSet = provider.getRecords();
recordSet.addAll(subRecordSet);
// Add in each record the provider name... a new provider doesn't
// need to know about it!
for (Record record : subRecordSet.getRecords()) {
if (record.isMutable()) {
record.makeMutable().addValue(
SubmissionLookupService.PROVIDER_NAME_FIELD,
new StringValue(providerName));
}
}
}
// Question: Do we want that in case of file data loader?
// for each publication in the record set, if it has a DOI, try to find
// extra pubs from the other providers
if (searchTerms != null
|| (identifiers != null && !identifiers
.containsKey(SubmissionLookupDataLoader.DOI))) { // Extend
Map<String, Set<String>> provider2foundDOIs = new HashMap<String, Set<String>>();
List<String> foundDOIs = new ArrayList<String>();
for (Record publication : recordSet.getRecords()) {
String providerName = SubmissionLookupUtils.getFirstValue(
publication,
SubmissionLookupService.PROVIDER_NAME_FIELD);
String doi = null;
if (publication.getValues(SubmissionLookupDataLoader.DOI) != null
&& publication
.getValues(SubmissionLookupDataLoader.DOI)
.size() > 0) {
doi = publication.getValues(SubmissionLookupDataLoader.DOI)
.iterator().next().getAsString();
}
if (doi == null) {
doi = NOT_FOUND_DOI;
} else {
doi = SubmissionLookupUtils.normalizeDOI(doi);
if (!foundDOIs.contains(doi)) {
foundDOIs.add(doi);
}
Set<String> tmp = provider2foundDOIs.get(providerName);
if (tmp == null) {
tmp = new HashSet<String>();
provider2foundDOIs.put(providerName, tmp);
}
tmp.add(doi);
}
}
for (String providerName : dataloadersMap.keySet()) {
DataLoader genProvider = dataloadersMap.get(providerName);
if (!(genProvider instanceof SubmissionLookupDataLoader)) {
continue;
}
SubmissionLookupDataLoader provider = (SubmissionLookupDataLoader) genProvider;
// Provider must support DOI
if (!provider.getSupportedIdentifiers().contains(
SubmissionLookupDataLoader.DOI)) {
continue;
}
// if (evictProviders != null
// && evictProviders.contains(provider.getShortName())) {
// continue;
// }
Set<String> doiToSearch = new HashSet<String>();
Set<String> alreadyFoundDOIs = provider2foundDOIs
.get(providerName);
for (String doi : foundDOIs) {
if (alreadyFoundDOIs == null
|| !alreadyFoundDOIs.contains(doi)) {
doiToSearch.add(doi);
}
}
List<Record> pPublications = null;
Context context = null;
try {
if (doiToSearch.size() > 0) {
context = new Context();
pPublications = provider.getByDOIs(context, doiToSearch);
}
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
if (context != null && context.isValid()) {
context.abort();
}
}
if (pPublications != null) {
for (Record rec : pPublications) {
recordSet.addRecord(rec);
if (rec.isMutable()) {
rec.makeMutable().addValue(
SubmissionLookupService.PROVIDER_NAME_FIELD,
new StringValue(providerName));
}
}
}
}
}
log.info("BTE DataLoader finished. Items loaded: "
+ recordSet.getRecords().size());
// Printing debug message
String totalString = "";
for (Record record : recordSet.getRecords()) {
totalString += SubmissionLookupUtils.getPrintableString(record)
+ "\n";
}
log.debug("Records loaded:\n" + totalString);
return recordSet;
}
/*
* (non-Javadoc)
*
* @see
* gr.ekt.bte.core.DataLoader#getRecords(gr.ekt.bte.core.DataLoadingSpec)
*/
@Override
public RecordSet getRecords(DataLoadingSpec loadingSpec)
throws MalformedSourceException {
// Identify the end of loading
if (loadingSpec.getOffset() > 0) {
return new RecordSet();
}
return getRecords();
}
public Map<String, DataLoader> getProvidersMap() {
return dataloadersMap;
}
public void setDataloadersMap(Map<String, DataLoader> providersMap) {
this.dataloadersMap = providersMap;
}
public void setIdentifiers(Map<String, Set<String>> identifiers) {
this.identifiers = identifiers;
this.filename = null;
this.searchTerms = null;
if (dataloadersMap != null) {
for (String providerName : dataloadersMap.keySet()) {
DataLoader provider = dataloadersMap.get(providerName);
if (provider instanceof NetworkSubmissionLookupDataLoader) {
((NetworkSubmissionLookupDataLoader) provider)
.setIdentifiers(identifiers);
}
}
}
}
public void setSearchTerms(Map<String, Set<String>> searchTerms) {
this.searchTerms = searchTerms;
this.identifiers = null;
this.filename = null;
if (dataloadersMap != null) {
for (String providerName : dataloadersMap.keySet()) {
DataLoader provider = dataloadersMap.get(providerName);
if (provider instanceof NetworkSubmissionLookupDataLoader) {
((NetworkSubmissionLookupDataLoader) provider)
.setSearchTerms(searchTerms);
}
}
}
}
public void setFile(String filename, String type) {
this.filename = filename;
this.type = type;
this.identifiers = null;
this.searchTerms = null;
if (dataloadersMap != null) {
for (String providerName : dataloadersMap.keySet()) {
DataLoader provider = dataloadersMap.get(providerName);
if (provider instanceof FileDataLoader) {
((FileDataLoader) provider).setFilename(filename);
}
}
}
}
public Map<String, DataLoader> filterProviders() {
Map<String, DataLoader> result = new HashMap<String, DataLoader>();
for (String providerName : dataloadersMap.keySet()) {
DataLoader dataLoader = dataloadersMap.get(providerName);
if (searchTerms != null && identifiers == null && filename == null) {
if (dataLoader instanceof SubmissionLookupDataLoader &&
((SubmissionLookupDataLoader) dataLoader).isSearchProvider()) {
result.put(providerName, dataLoader);
}
} else if (searchTerms == null && identifiers != null && filename == null) {
if (dataLoader instanceof SubmissionLookupDataLoader) {
result.put(providerName, dataLoader);
}
} else if (searchTerms == null && identifiers == null
&& filename != null) {
if (dataLoader instanceof FileDataLoader) {
// add only the one that we are interested in
if (providerName.endsWith(type)) {
result.put(providerName, dataLoader);
}
}
}
}
return result;
}
}

View File

@@ -1,150 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.IOException;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.DataLoadingSpec;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.Value;
import gr.ekt.bte.exceptions.MalformedSourceException;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpException;
import org.dspace.core.Context;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public abstract class NetworkSubmissionLookupDataLoader implements
SubmissionLookupDataLoader {
Map<String, Set<String>> identifiers; // Searching by identifiers (DOI ...)
Map<String, Set<String>> searchTerms; // Searching by author, title, date
Map<String, String> fieldMap; // mapping between service fields and local
// intermediate fields
String providerName;
@Override
public List<Record> getByDOIs(Context context, Set<String> doiToSearch)
throws HttpException, IOException {
Map<String, Set<String>> keys = new HashMap<String, Set<String>>();
keys.put(DOI, doiToSearch);
return getByIdentifier(context, keys);
}
// BTE Data Loader interface methods
@Override
public RecordSet getRecords() throws MalformedSourceException {
RecordSet recordSet = new RecordSet();
List<Record> results = null;
try {
if (getIdentifiers() != null) { // Search by identifiers
results = getByIdentifier(null, getIdentifiers());
} else {
String title = getSearchTerms().get("title") != null ? getSearchTerms()
.get("title").iterator().next()
: null;
String authors = getSearchTerms().get("authors") != null ? getSearchTerms()
.get("authors").iterator().next()
: null;
String year = getSearchTerms().get("year") != null ? getSearchTerms()
.get("year").iterator().next()
: String.valueOf(Calendar.getInstance().get(Calendar.YEAR));
int yearInt = Integer.parseInt(year);
results = search(null, title, authors, yearInt);
}
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
if (results != null) {
for (Record record : results) {
recordSet.addRecord(record);
}
}
return recordSet;
}
@Override
public RecordSet getRecords(DataLoadingSpec arg0)
throws MalformedSourceException {
return getRecords();
}
public Map<String, Set<String>> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(Map<String, Set<String>> identifiers) {
this.identifiers = identifiers;
}
public Map<String, Set<String>> getSearchTerms() {
return searchTerms;
}
public void setSearchTerms(Map<String, Set<String>> searchTerms) {
this.searchTerms = searchTerms;
}
public Map<String, String> getFieldMap() {
return fieldMap;
}
public void setFieldMap(Map<String, String> fieldMap) {
this.fieldMap = fieldMap;
}
public void setProviderName(String providerName) {
this.providerName = providerName;
}
public Record convertFields(Record publication) {
for (String fieldName : fieldMap.keySet()) {
String md = null;
if (fieldMap != null) {
md = this.fieldMap.get(fieldName);
}
if (StringUtils.isBlank(md)) {
continue;
} else {
md = md.trim();
}
if (publication.isMutable()) {
List<Value> values = publication.getValues(fieldName);
publication.makeMutable().removeField(fieldName);
publication.makeMutable().addField(md, values);
}
}
return publication;
}
}

View File

@@ -1,75 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.ArrayList;
import java.util.List;
import gr.ekt.bte.core.AbstractModifier;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class RemoveLastDotModifier extends AbstractModifier {
List<String> fieldKeys;
/**
* @param name modifier name
*/
public RemoveLastDotModifier(String name) {
super(name);
}
/*
* (non-Javadoc)
*
* @see
* gr.ekt.bte.core.AbstractModifier#modify(gr.ekt.bte.core.MutableRecord)
*/
@Override
public Record modify(MutableRecord record) {
if (fieldKeys != null) {
for (String key : fieldKeys) {
List<Value> values = record.getValues(key);
List<Value> newValues = new ArrayList<Value>();
if (values != null) {
for (Value value : values) {
String valueString = value.getAsString();
if (StringUtils.isNotBlank(valueString)
&& valueString.endsWith(".")) {
newValues.add(new StringValue(valueString
.substring(0, valueString.length() - 1)));
} else {
newValues.add(new StringValue(valueString));
}
}
record.updateField(key, newValues);
}
}
}
return record;
}
public void setFieldKeys(List<String> fieldKeys) {
this.fieldKeys = fieldKeys;
}
}

View File

@@ -1,103 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.List;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.DataLoadingSpec;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.exceptions.MalformedSourceException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.submit.util.ItemSubmissionLookupDTO;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class SubmissionItemDataLoader implements DataLoader {
protected List<ItemSubmissionLookupDTO> dtoList;
List<DataLoader> providers;
private static Logger log = LogManager.getLogger(SubmissionItemDataLoader.class);
public SubmissionItemDataLoader() {
dtoList = null;
providers = null;
}
@Override
public RecordSet getRecords() throws MalformedSourceException {
if (dtoList == null) {
throw new MalformedSourceException("dtoList not initialized");
}
RecordSet ret = new RecordSet();
for (ItemSubmissionLookupDTO dto : dtoList) {
Record rec = dto.getTotalPublication(providers);
ret.addRecord(rec);
}
log.info("BTE DataLoader finished. Items loaded: "
+ ret.getRecords().size());
// Printing debug message
String totalString = "";
for (Record record : ret.getRecords()) {
totalString += SubmissionLookupUtils.getPrintableString(record)
+ "\n";
}
log.debug("Records loaded:\n" + totalString);
return ret;
}
@Override
public RecordSet getRecords(DataLoadingSpec spec)
throws MalformedSourceException {
if (spec.getOffset() > 0) {
return new RecordSet();
}
return getRecords();
}
/**
* @return the dtoList
*/
public List<ItemSubmissionLookupDTO> getDtoList() {
return dtoList;
}
/**
* @param dtoList the dtoList to set
*/
public void setDtoList(List<ItemSubmissionLookupDTO> dtoList) {
this.dtoList = dtoList;
}
/**
* @return the providers
*/
public List<DataLoader> getProviders() {
return providers;
}
/**
* @param providers the providers to set
*/
public void setProviders(List<DataLoader> providers) {
this.providers = providers;
}
}

View File

@@ -1,55 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.Record;
import org.apache.http.HttpException;
import org.dspace.core.Context;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public interface SubmissionLookupDataLoader extends DataLoader {
public final static String DOI = "doi";
public final static String PUBMED = "pubmed";
public final static String ARXIV = "arxiv";
public final static String REPEC = "repec";
public final static String SCOPUSEID = "scopuseid";
public final static String CINII = "cinii";
public final static String TYPE = "type";
List<String> getSupportedIdentifiers();
boolean isSearchProvider();
List<Record> search(Context context, String title, String author, int year)
throws HttpException, IOException;
List<Record> getByIdentifier(Context context, Map<String, Set<String>> keys)
throws HttpException, IOException;
List<Record> getByDOIs(Context context, Set<String> doiToSearch)
throws HttpException, IOException;
}

View File

@@ -1,91 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import gr.ekt.bte.core.DataOutputSpec;
import gr.ekt.bte.core.OutputGenerator;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.Value;
import org.dspace.submit.util.ItemSubmissionLookupDTO;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class SubmissionLookupOutputGenerator implements OutputGenerator {
protected List<ItemSubmissionLookupDTO> dtoList;
protected final String DOI_FIELD = "doi";
protected final String NOT_FOUND_DOI = "NOT-FOUND-DOI";
public SubmissionLookupOutputGenerator() {
}
@Override
public List<String> generateOutput(RecordSet records) {
dtoList = new ArrayList<ItemSubmissionLookupDTO>();
Map<String, List<Record>> record_sets = new HashMap<String, List<Record>>();
int counter = 0;
for (Record rec : records) {
String current_doi = NOT_FOUND_DOI;
List<Value> values = rec.getValues(DOI_FIELD);
if (values != null && values.size() > 0) {
current_doi = values.get(0).getAsString();
} else {
current_doi = NOT_FOUND_DOI + "_" + counter;
}
if (record_sets.keySet().contains(current_doi)) {
record_sets.get(current_doi).add(rec);
} else {
ArrayList<Record> publication = new ArrayList<Record>();
publication.add(rec);
record_sets.put(current_doi, publication);
}
counter++;
}
for (Map.Entry<String, List<Record>> entry : record_sets.entrySet()) {
ItemSubmissionLookupDTO dto = new ItemSubmissionLookupDTO(
entry.getValue());
dtoList.add(dto);
}
return new ArrayList<String>();
}
@Override
public List<String> generateOutput(RecordSet records, DataOutputSpec spec) {
return generateOutput(records);
}
/**
* @return the items
*/
public List<ItemSubmissionLookupDTO> getDtoList() {
return dtoList;
}
/**
* @param items the items to set
*/
public void setDtoList(List<ItemSubmissionLookupDTO> items) {
this.dtoList = items;
}
}

View File

@@ -1,194 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.TransformationEngine;
import gr.ekt.bte.dataloader.FileDataLoader;
import org.apache.logging.log4j.Logger;
import org.dspace.submit.util.SubmissionLookupDTO;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class SubmissionLookupService {
public static final String CFG_MODULE = "submission-lookup";
public static final String SL_NAMESPACE_PREFIX = "http://www.dspace.org/sl/";
public static final String MANUAL_USER_INPUT = "manual";
public static final String PROVIDER_NAME_FIELD = "provider_name_field";
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(SubmissionLookupService.class);
public static final String SEPARATOR_VALUE = "#######";
public static final String SEPARATOR_VALUE_REGEX = SEPARATOR_VALUE;
protected List<DataLoader> providers;
protected Map<String, List<String>> idents2provs;
protected List<String> searchProviders;
protected List<String> fileProviders;
protected TransformationEngine phase1TransformationEngine;
protected TransformationEngine phase2TransformationEngine;
protected List<String> detailFields = null;
public void setPhase2TransformationEngine(
TransformationEngine phase2TransformationEngine) {
this.phase2TransformationEngine = phase2TransformationEngine;
}
public void setPhase1TransformationEngine(
TransformationEngine phase1TransformationEngine) {
this.phase1TransformationEngine = phase1TransformationEngine;
MultipleSubmissionLookupDataLoader dataLoader = (MultipleSubmissionLookupDataLoader) phase1TransformationEngine
.getDataLoader();
this.idents2provs = new HashMap<String, List<String>>();
this.searchProviders = new ArrayList<String>();
this.fileProviders = new ArrayList<String>();
if (providers == null) {
this.providers = new ArrayList<DataLoader>();
for (String providerName : dataLoader.getProvidersMap().keySet()) {
DataLoader p = dataLoader.getProvidersMap().get(providerName);
this.providers.add(p);
// Do not do that for file providers
if (p instanceof FileDataLoader) {
this.fileProviders.add(providerName);
} else if (p instanceof NetworkSubmissionLookupDataLoader) {
NetworkSubmissionLookupDataLoader p2 = (NetworkSubmissionLookupDataLoader) p;
p2.setProviderName(providerName);
if (p2.isSearchProvider()) {
searchProviders.add(providerName);
}
List<String> suppIdentifiers = p2.getSupportedIdentifiers();
if (suppIdentifiers != null) {
for (String ident : suppIdentifiers) {
List<String> tmp = idents2provs.get(ident);
if (tmp == null) {
tmp = new ArrayList<String>();
idents2provs.put(ident, tmp);
}
tmp.add(providerName);
}
}
}
}
}
}
public TransformationEngine getPhase1TransformationEngine() {
return phase1TransformationEngine;
}
public TransformationEngine getPhase2TransformationEngine() {
return phase2TransformationEngine;
}
public List<String> getIdentifiers() {
List<String> allSupportedIdentifiers = new ArrayList<String>();
MultipleSubmissionLookupDataLoader dataLoader = (MultipleSubmissionLookupDataLoader) phase1TransformationEngine
.getDataLoader();
for (String providerName : dataLoader.getProvidersMap().keySet()) {
DataLoader provider = dataLoader.getProvidersMap()
.get(providerName);
if (provider instanceof SubmissionLookupDataLoader) {
for (String identifier : ((SubmissionLookupDataLoader) provider)
.getSupportedIdentifiers()) {
if (!allSupportedIdentifiers.contains(identifier)) {
allSupportedIdentifiers.add(identifier);
}
}
}
}
return allSupportedIdentifiers;
}
public Map<String, List<String>> getProvidersIdentifiersMap() {
return idents2provs;
}
public SubmissionLookupDTO getSubmissionLookupDTO(
HttpServletRequest request, String uuidSubmission) {
SubmissionLookupDTO dto = (SubmissionLookupDTO) request.getSession()
.getAttribute("submission_lookup_" + uuidSubmission);
if (dto == null) {
dto = new SubmissionLookupDTO();
storeDTOs(request, uuidSubmission, dto);
}
return dto;
}
public void invalidateDTOs(HttpServletRequest request, String uuidSubmission) {
request.getSession().removeAttribute(
"submission_lookup_" + uuidSubmission);
}
public void storeDTOs(HttpServletRequest request, String uuidSubmission,
SubmissionLookupDTO dto) {
request.getSession().setAttribute(
"submission_lookup_" + uuidSubmission, dto);
}
public List<String> getSearchProviders() {
return searchProviders;
}
public List<DataLoader> getProviders() {
return providers;
}
public static String getProviderName(Record rec) {
return SubmissionLookupUtils.getFirstValue(rec,
SubmissionLookupService.PROVIDER_NAME_FIELD);
}
public static String getType(Record rec) {
return SubmissionLookupUtils.getFirstValue(rec,
SubmissionLookupDataLoader.TYPE);
}
public List<String> getFileProviders() {
return this.fileProviders;
}
public List<String> getDetailFields() {
return detailFields;
}
public void setDetailFields(List<String> detailFields) {
this.detailFields = detailFields;
}
}

View File

@@ -1,156 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.io.File;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.Value;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.content.Item;
import org.dspace.content.MetadataSchema;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.MetadataSchemaService;
import org.dspace.core.Context;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class SubmissionLookupUtils {
private static final Logger log = LogManager.getLogger(SubmissionLookupUtils.class);
/**
* Default constructor
*/
private SubmissionLookupUtils() { }
private static final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
/**
* Location of config file
*/
private static final String configFilePath = configurationService
.getProperty("dspace.dir")
+ File.separator
+ "config"
+ File.separator + "crosswalks" + File.separator;
// Patter to extract the converter name if any
private static final Pattern converterPattern = Pattern.compile(".*\\((.*)\\)");
protected static final MetadataSchemaService metadataSchemaService =
ContentServiceFactory.getInstance().getMetadataSchemaService();
protected static final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
public static LookupProvidersCheck getProvidersCheck(Context context,
Item item, String dcSchema, String dcElement,
String dcQualifier) {
try {
LookupProvidersCheck check = new LookupProvidersCheck();
List<MetadataSchema> schemas = metadataSchemaService.findAll(context);
List<MetadataValue> values = itemService.getMetadata(item, dcSchema, dcElement,
dcQualifier, Item.ANY);
for (MetadataSchema schema : schemas) {
boolean error = false;
if (schema.getNamespace().startsWith(
SubmissionLookupService.SL_NAMESPACE_PREFIX)) {
List<MetadataValue> slCache = itemService.getMetadata(item, schema.getName(),
dcElement, dcQualifier, Item.ANY);
if (slCache.isEmpty()) {
continue;
}
if (slCache.size() != values.size()) {
error = true;
} else {
for (int idx = 0; idx < values.size(); idx++) {
MetadataValue v = values.get(idx);
MetadataValue sl = slCache.get(idx);
// FIXME gestire authority e possibilita' multiple:
// match non sicuri, affiliation, etc.
if (!v.getValue().equals(sl.getValue())) {
error = true;
break;
}
}
}
if (error) {
check.getProvidersErr().add(schema.getName());
} else {
check.getProvidersOk().add(schema.getName());
}
}
}
return check;
} catch (SQLException e) {
log.error(e.getMessage(), e);
throw new RuntimeException(e.getMessage(), e);
}
}
public static String normalizeDOI(String doi) {
if (doi != null) {
return doi.trim().replaceAll("^http://dx.doi.org/", "")
.replaceAll("^doi:", "");
}
return null;
}
public static String getFirstValue(Record rec, String field) {
List<Value> values = rec.getValues(field);
String value = null;
if (values != null && values.size() > 0) {
value = values.get(0).getAsString();
}
return value;
}
public static List<String> getValues(Record rec, String field) {
List<String> result = new ArrayList<>();
List<Value> values = rec.getValues(field);
if (values != null && values.size() > 0) {
for (Value value : values) {
result.add(value.getAsString());
}
}
return result;
}
public static String getPrintableString(Record record) {
StringBuilder result = new StringBuilder();
result.append("\nPublication {\n");
for (String field : record.getFields()) {
result.append("--").append(field).append(":\n");
List<Value> values = record.getValues(field);
for (Value value : values) {
result.append("\t").append(value.getAsString()).append("\n");
}
}
result.append("}\n");
return result.toString();
}
}

View File

@@ -1,97 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.lookup;
import java.util.ArrayList;
import java.util.List;
import gr.ekt.bte.core.AbstractModifier;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class ValueConcatenationModifier extends AbstractModifier {
private String field;
private String separator = ",";
private boolean whitespaceAfter = true;
public ValueConcatenationModifier() {
super("ValueConcatenationModifier");
}
@Override
public Record modify(MutableRecord rec) {
List<Value> values = rec.getValues(field);
if (values != null) {
List<String> converted_values = new ArrayList<String>();
for (Value val : values) {
converted_values.add(val.getAsString());
}
List<Value> final_value = new ArrayList<Value>();
String v = StringUtils.join(converted_values.iterator(), separator
+ (whitespaceAfter ? " " : ""));
final_value.add(new StringValue(v));
rec.updateField(field, final_value);
}
return rec;
}
/**
* @return the field
*/
public String getField() {
return field;
}
/**
* @param field the field to set
*/
public void setField(String field) {
this.field = field;
}
/**
* @return the separator
*/
public String getSeparator() {
return separator;
}
/**
* @param separator the separator to set
*/
public void setSeparator(String separator) {
this.separator = separator;
}
/**
* @return the whiteSpaceAfter
*/
public boolean isWhitespaceAfter() {
return whitespaceAfter;
}
/**
* @param whiteSpaceAfter the whiteSpaceAfter to set
*/
public void setWhitespaceAfter(boolean whiteSpaceAfter) {
this.whitespaceAfter = whiteSpaceAfter;
}
}

View File

@@ -1,22 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.step;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
*/
public class DescribeStep extends MetadataStep {
/**
* log4j logger
*/
private static final Logger log = LogManager.getLogger();
}

View File

@@ -1,22 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.step;
import org.apache.log4j.Logger;
/**
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
*/
public class ExtractionStep extends MetadataStep {
/**
* log4j logger
*/
private static Logger log = Logger
.getLogger(ExtractionStep.class);
}

View File

@@ -1,197 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.step;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpException;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.InProgressSubmission;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.core.Context;
import org.dspace.core.Utils;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.submit.AbstractProcessingStep;
import org.dspace.submit.listener.MetadataListener;
import org.dspace.submit.lookup.SubmissionLookupDataLoader;
//FIXME move to the ExtractionStep
/**
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
*/
public class MetadataStep extends AbstractProcessingStep {
/**
* log4j logger
*/
private static Logger log = Logger.getLogger(MetadataStep.class);
protected List<MetadataListener> listeners = DSpaceServicesFactory.getInstance().getServiceManager()
.getServicesByType(MetadataListener.class);
protected Map<String, List<MetadataValue>> metadataMap = new HashMap<String, List<MetadataValue>>();
private Map<String, Set<String>> results = new HashMap<String, Set<String>>();
private Map<String, String> mappingIdentifier = new HashMap<String, String>();
@Override
public void doPreProcessing(Context context, InProgressSubmission wsi) {
for (MetadataListener listener : listeners) {
for (String metadata : listener.getMetadata().keySet()) {
String[] tokenized = Utils.tokenize(metadata);
List<MetadataValue> mm = itemService.getMetadata(wsi.getItem(), tokenized[0], tokenized[1],
tokenized[2], Item.ANY);
if (mm != null && !mm.isEmpty()) {
metadataMap.put(metadata, mm);
} else {
metadataMap.put(metadata, new ArrayList<MetadataValue>());
}
mappingIdentifier.put(metadata, listener.getMetadata().get(metadata));
}
}
}
@Override
public void doPostProcessing(Context context, InProgressSubmission wsi) {
external:
for (String metadata : metadataMap.keySet()) {
String[] tokenized = Utils.tokenize(metadata);
List<MetadataValue> currents = itemService.getMetadata(wsi.getItem(), tokenized[0], tokenized[1],
tokenized[2], Item.ANY);
if (currents != null && !currents.isEmpty()) {
List<MetadataValue> olds = metadataMap.get(metadata);
if (olds.isEmpty()) {
process(context, metadata, currents);
continue external;
}
internal:
for (MetadataValue current : currents) {
boolean found = false;
for (MetadataValue old : olds) {
if (old.getValue().equals(current.getValue())) {
found = true;
}
}
if (!found) {
process(context, metadata, current);
}
}
}
}
if (!results.isEmpty()) {
for (MetadataListener listener : listeners) {
for (DataLoader dataLoader : listener.getDataloadersMap().values()) {
SubmissionLookupDataLoader submissionLookupDataLoader = (SubmissionLookupDataLoader) dataLoader;
try {
List<Record> recordSet = submissionLookupDataLoader.getByIdentifier(context, results);
List<Record> resultSet = convertFields(recordSet, bteBatchImportService.getOutputMap());
enrichItem(context, resultSet, wsi.getItem());
} catch (HttpException | IOException | SQLException | AuthorizeException e) {
log.error(e.getMessage(), e);
}
}
}
}
}
protected void enrichItem(Context context, List<Record> rset, Item item) throws SQLException, AuthorizeException {
for (Record record : rset) {
for (String field : record.getFields()) {
try {
String[] tfield = Utils.tokenize(field);
List<MetadataValue> mdvs = itemService
.getMetadata(item, tfield[0], tfield[1], tfield[2], Item.ANY);
if (mdvs == null || mdvs.isEmpty()) {
for (Value value : record.getValues(field)) {
itemService.addMetadata(context, item, tfield[0], tfield[1], tfield[2], null,
value.getAsString());
}
} else {
external:
for (Value value : record.getValues(field)) {
boolean found = false;
for (MetadataValue mdv : mdvs) {
if (mdv.getValue().equals(value.getAsString())) {
found = true;
continue external;
}
}
if (!found) {
itemService.addMetadata(context, item, tfield[0], tfield[1], tfield[2], null,
value.getAsString());
}
}
}
} catch (SQLException e) {
log.error(e.getMessage(), e);
}
}
}
itemService.update(context, item);
}
private void process(Context context, String metadata, List<MetadataValue> currents) {
for (MetadataValue current : currents) {
process(context, metadata, current);
}
}
private void process(Context context, String metadata, MetadataValue current) {
String key = mappingIdentifier.get(metadata);
Set<String> identifiers = null;
if (!results.containsKey(key)) {
identifiers = new HashSet<String>();
} else {
identifiers = results.get(key);
}
identifiers.add(current.getValue());
results.put(key, identifiers);
}
public List<Record> convertFields(List<Record> recordSet, Map<String, String> fieldMap) {
List<Record> result = new ArrayList<Record>();
for (Record publication : recordSet) {
for (String fieldName : fieldMap.keySet()) {
String md = null;
if (fieldMap != null) {
md = fieldMap.get(fieldName);
}
if (StringUtils.isBlank(md)) {
continue;
} else {
md = md.trim();
}
if (publication.isMutable()) {
List<Value> values = publication.getValues(md);
publication.makeMutable().removeField(md);
publication.makeMutable().addField(fieldName, values);
}
}
result.add(publication);
}
return result;
}
}

View File

@@ -1,30 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.step;
import org.apache.logging.log4j.Logger;
import org.dspace.content.InProgressSubmission;
import org.dspace.core.Context;
import org.dspace.submit.AbstractProcessingStep;
public class SelectCollectionStep extends AbstractProcessingStep {
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(SelectCollectionStep.class);
@Override
public void doPreProcessing(Context context, InProgressSubmission wsi) {
// TODO Auto-generated method stub
}
@Override
public void doPostProcessing(Context context, InProgressSubmission wsi) {
// TODO Auto-generated method stub
}
}

View File

@@ -1,33 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.step;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.content.InProgressSubmission;
import org.dspace.core.Context;
import org.dspace.submit.AbstractProcessingStep;
public class StartSubmissionLookupStep extends AbstractProcessingStep {
/**
* log4j logger
*/
private static Logger log = LogManager.getLogger(StartSubmissionLookupStep.class);
@Override
public void doPreProcessing(Context context, InProgressSubmission wsi) {
// TODO Auto-generated method stub
}
@Override
public void doPostProcessing(Context context, InProgressSubmission wsi) {
// TODO Auto-generated method stub
}
}

View File

@@ -1,27 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.step;
import org.dspace.content.InProgressSubmission;
import org.dspace.core.Context;
import org.dspace.submit.AbstractProcessingStep;
public class VerifyStep extends AbstractProcessingStep {
@Override
public void doPreProcessing(Context context, InProgressSubmission wsi) {
// TODO Auto-generated method stub
}
@Override
public void doPostProcessing(Context context, InProgressSubmission wsi) {
// TODO Auto-generated method stub
}
}

View File

@@ -1,91 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.util;
import java.io.Serializable;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import gr.ekt.bte.core.DataLoader;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.Value;
import org.dspace.submit.lookup.SubmissionLookupService;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class ItemSubmissionLookupDTO implements Serializable {
private static final long serialVersionUID = 1;
private static final String MERGED_PUBLICATION_PROVIDER = "merged";
private static final String UNKNOWN_PROVIDER_STRING = "UNKNOWN-PROVIDER";
private List<Record> publications;
private String uuid;
public ItemSubmissionLookupDTO(List<Record> publications) {
this.uuid = UUID.randomUUID().toString();
this.publications = publications;
}
public List<Record> getPublications() {
return publications;
}
public Set<String> getProviders() {
Set<String> orderedProviders = new LinkedHashSet<String>();
for (Record p : publications) {
orderedProviders.add(SubmissionLookupService.getProviderName(p));
}
return orderedProviders;
}
public String getUUID() {
return uuid;
}
public Record getTotalPublication(List<DataLoader> providers) {
if (publications == null) {
return null;
} else if (publications.size() == 1) {
return publications.get(0);
} else {
MutableRecord pub = new SubmissionLookupPublication(
MERGED_PUBLICATION_PROVIDER);
// for (SubmissionLookupProvider prov : providers)
// {
for (Record p : publications) {
// if
// (!SubmissionLookupService.getProviderName(p).equals(prov.getShortName()))
// {
// continue;
// }
for (String field : p.getFields()) {
List<Value> values = p.getValues(field);
if (values != null && values.size() > 0) {
if (!pub.getFields().contains(field)) {
for (Value v : values) {
pub.addValue(field, v);
}
}
}
}
}
// }
return pub;
}
}
}

View File

@@ -1,45 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.util;
import java.io.Serializable;
import java.util.List;
import java.util.UUID;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class SubmissionLookupDTO implements Serializable {
private static final long serialVersionUID = 1;
private String uuid;
private List<ItemSubmissionLookupDTO> items;
public SubmissionLookupDTO() {
this.uuid = UUID.randomUUID().toString();
}
public void setItems(List<ItemSubmissionLookupDTO> items) {
this.items = items;
}
public ItemSubmissionLookupDTO getLookupItem(String uuidLookup) {
if (items != null) {
for (ItemSubmissionLookupDTO item : items) {
if (item.getUUID().equals(uuidLookup)) {
return item;
}
}
}
return null;
}
}

View File

@@ -1,189 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.submit.util;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gr.ekt.bte.core.MutableRecord;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.core.Value;
import org.apache.commons.lang3.StringUtils;
import org.dspace.submit.lookup.SubmissionLookupDataLoader;
/**
* @author Andrea Bollini
* @author Kostas Stamatis
* @author Luigi Andrea Pascarelli
* @author Panagiotis Koutsourakis
*/
public class SubmissionLookupPublication implements MutableRecord, Serializable {
private String providerName;
private Map<String, List<String>> storage = new HashMap<String, List<String>>();
public SubmissionLookupPublication(String providerName) {
this.providerName = providerName;
}
// needed to serialize it with JSON
public Map<String, List<String>> getStorage() {
return storage;
}
@Override
public Set<String> getFields() {
return storage.keySet();
}
public List<String> remove(String md) {
return storage.remove(md);
}
public void add(String md, String nValue) {
if (StringUtils.isNotBlank(nValue)) {
List<String> tmp = storage.get(md);
if (tmp == null) {
tmp = new ArrayList<String>();
storage.put(md, tmp);
}
tmp.add(nValue);
}
}
public String getFirstValue(String md) {
List<String> tmp = storage.get(md);
if (tmp == null || tmp.size() == 0) {
return null;
}
return tmp.get(0);
}
public String getProviderName() {
return providerName;
}
public String getType() {
return getFirstValue(SubmissionLookupDataLoader.TYPE);
}
// BTE Record interface methods
@Override
public boolean hasField(String md) {
return storage.containsKey(md);
}
@Override
public List<Value> getValues(String md) {
List<String> stringValues = storage.get(md);
if (stringValues == null) {
return null;
}
List<Value> values = new ArrayList<Value>();
for (String value : stringValues) {
values.add(new StringValue(value));
}
return values;
}
@Override
public boolean isMutable() {
return true;
}
@Override
public MutableRecord makeMutable() {
return this;
}
@Override
public boolean addField(String md, List<Value> values) {
if (storage.containsKey(md)) {
List<String> stringValues = storage.get(md);
if (values != null) {
for (Value value : values) {
stringValues.add(value.getAsString());
}
}
} else {
List<String> tmp = new ArrayList<String>();
if (values != null) {
for (Value value : values) {
tmp.add(value.getAsString());
}
}
storage.put(md, tmp);
}
return true;
}
@Override
public boolean addValue(String md, Value value) {
if (storage.containsKey(md)) {
List<String> stringValues = storage.get(md);
stringValues.add(value.getAsString());
} else {
List<String> tmp = new ArrayList<String>();
tmp.add(value.getAsString());
storage.put(md, tmp);
}
return true;
}
@Override
public boolean removeField(String md) {
if (storage.containsKey(md)) {
storage.remove(md);
}
return false;
}
@Override
public boolean removeValue(String md, Value value) {
if (storage.containsKey(md)) {
List<String> stringValues = storage.get(md);
stringValues.remove(value.getAsString());
}
return true;
}
@Override
public boolean updateField(String md, List<Value> values) {
List<String> stringValues = new ArrayList<String>();
for (Value value : values) {
stringValues.add(value.getAsString());
}
storage.put(md, stringValues);
return true;
}
@Override
public boolean updateValue(String md, Value valueOld, Value valueNew) {
if (storage.containsKey(md)) {
List<String> stringValues = storage.get(md);
List<String> newStringValues = storage.get(md);
for (String s : stringValues) {
if (s.equals(valueOld.getAsString())) {
newStringValues.add(valueNew.getAsString());
} else {
newStringValues.add(s);
}
}
storage.put(md, newStringValues);
}
return true;
}
}

View File

@@ -19,7 +19,9 @@
<context:annotation-config/> <!-- allows us to use spring annotations in beans -->
<bean id="importService" class="org.dspace.importer.external.service.ImportService" lazy-init="false" autowire="byType" destroy-method="destroy">
<bean id="org.dspace.importer.external.service.ImportService"
class="org.dspace.importer.external.service.ImportService"
lazy-init="false" autowire="byType" destroy-method="destroy">
<property name="importSources">
<list>
<ref bean="pubmedImportService" />

View File

@@ -19,6 +19,7 @@
<submission-map>
<name-map collection-handle="default" submission-name="traditional"/>
<name-map collection-handle="123456789/language-test-1" submission-name="languagetestprocess"/>
<name-map collection-handle="123456789/extraction-test" submission-name="extractiontestprocess"/>
</submission-map>
@@ -81,20 +82,16 @@
<!--Step will be to select a Creative Commons License -->
<!-- Uncomment this step to allow the user to select a Creative Commons
license -->
<step-definition id="cclicense"> <heading>submit.progressbar.CClicense</heading>
<step-definition id="cclicense"> <heading>submit.progressbar.CClicense</heading>
<processing-class>org.dspace.app.rest.submit.step.CCLicenseStep</processing-class>
<type>cclicense</type> </step-definition>
<type>cclicense</type>
</step-definition>
<!--Step will be to Check for potential duplicate -->
<!-- <step-definition id="detect-duplicate"> <heading>submit.progressbar.detect.duplicate</heading>
<processing-class>org.dspace.submit.step.DetectPotentialDuplicate</processing-class>
<type>duplicate</type> </step-definition> -->
<!--Step will be to Verify/Review everything -->
<!-- <step-definition id="verify"> <heading>submit.progressbar.verify</heading> <processing-class>org.dspace.submit.step.VerifyStep</processing-class>
<type>verify</type> </step-definition> -->
<step-definition id="extractionstep">
<heading>submit.progressbar.ExtractMetadataStep</heading>
<processing-class>org.dspace.app.rest.submit.step.ExtractMetadataStep</processing-class>
<type>cclicense</type>
</step-definition>
<!-- Fake Steps to test parsing of all options -->
<!-- <step-definition mandatory="false"> <heading>fake.submission.readonly</heading>
@@ -148,18 +145,28 @@
<!--Step will be to Upload the item -->
<step id="upload"/>
<!-- <step id="upload-with-embargo"/> -->
<!-- <step id="detect-duplicate"/> -->
<!-- <step id="extractionstep"/> -->
<!--Step will be to Sign off on the License -->
<step id="license"/>
<step id="cclicense"/>
<!-- <step id="verify"/> -->
</submission-process>
<submission-process name="languagetestprocess">
<step id="collection"/>
<step id="languagetest"/>
</submission-process>
<submission-process name="extractiontestprocess">
<step id="collection"/>
<step id="traditionalpageone"/>
<step id="traditionalpagetwo"/>
<step id="upload"/>
<step id="extractionstep"/>
<step id="license"/>
<step id="cclicense"/>
</submission-process>
</submission-definitions>
</item-submission>

View File

@@ -53,5 +53,12 @@
<bean id="orcidRestConnector" class="org.dspace.external.OrcidRestConnector">
<constructor-arg value="${orcid.api.url}"/>
</bean>
<bean id="pubmedLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="pubmedImportService"/>
<property name="sourceIdentifier" value="pubmed"/>
<property name="recordIdMetadata" value="dc.identifier.other"/>
</bean>
</beans>

View File

@@ -100,8 +100,10 @@ public abstract class AInprogressItemConverter<T extends InProgressSubmission,
for (ErrorRest error : stepProcessing.validate(submissionService, obj, stepConfig)) {
addError(witem.getErrors(), error);
}
witem.getSections()
.put(sections.getId(), stepProcessing.getData(submissionService, obj, stepConfig));
if (stepProcessing.hasDataSection()) {
witem.getSections()
.put(sections.getId(), stepProcessing.getData(submissionService, obj, stepConfig));
}
} else {
log.warn("The submission step class specified by '" + stepConfig.getProcessingClassName() +
"' does not extend the class org.dspace.app.rest.submit.AbstractRestProcessingStep!" +

View File

@@ -230,9 +230,13 @@ public class WorkspaceItemRestRepository extends DSpaceRestRepository<WorkspaceI
List<ErrorRest> errors = new ArrayList<ErrorRest>();
SubmissionConfig submissionConfig =
submissionConfigReader.getSubmissionConfigByName(wsi.getSubmissionDefinition().getName());
List<Object[]> stepInstancesAndConfigs = new ArrayList<Object[]>();
// we need to run the preProcess of all the appropriate steps and move on to the
// upload and postProcess step
// We will initialize the step class just one time so that it will be the same
// instance over all the phase and we will reduce initialization time as well
for (int i = 0; i < submissionConfig.getNumberOfSteps(); i++) {
SubmissionStepConfig stepConfig = submissionConfig.getStep(i);
/*
* First, load the step processing class (using the current
* class loader)
@@ -241,23 +245,34 @@ public class WorkspaceItemRestRepository extends DSpaceRestRepository<WorkspaceI
Class stepClass;
try {
stepClass = loader.loadClass(stepConfig.getProcessingClassName());
Object stepInstance = stepClass.newInstance();
if (UploadableStep.class.isAssignableFrom(stepClass)) {
UploadableStep uploadableStep = (UploadableStep) stepInstance;
uploadableStep.doPreProcessing(context, source);
ErrorRest err =
uploadableStep.upload(context, submissionService, stepConfig, source, file);
uploadableStep.doPostProcessing(context, source);
if (err != null) {
errors.add(err);
}
Object stepInstance = stepClass.newInstance();
stepInstancesAndConfigs.add(new Object[] {stepInstance, stepConfig});
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
for (Object[] stepInstanceAndCfg : stepInstancesAndConfigs) {
UploadableStep uploadableStep = (UploadableStep) stepInstanceAndCfg[0];
uploadableStep.doPreProcessing(context, source);
}
for (Object[] stepInstanceAndCfg : stepInstancesAndConfigs) {
UploadableStep uploadableStep = (UploadableStep) stepInstanceAndCfg[0];
ErrorRest err;
try {
err = uploadableStep.upload(context, submissionService, (SubmissionStepConfig) stepInstanceAndCfg[1],
source, file);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (err != null) {
errors.add(err);
}
}
for (Object[] stepInstanceAndCfg : stepInstancesAndConfigs) {
UploadableStep uploadableStep = (UploadableStep) stepInstanceAndCfg[0];
uploadableStep.doPostProcessing(context, source);
}
wsi = converter.toRest(source, utils.obtainProjection());
@@ -295,50 +310,67 @@ public class WorkspaceItemRestRepository extends DSpaceRestRepository<WorkspaceI
boolean sectionExist = false;
SubmissionConfig submissionConfig = submissionConfigReader
.getSubmissionConfigByName(wsi.getSubmissionDefinition().getName());
for (int stepNum = 0; stepNum < submissionConfig.getNumberOfSteps(); stepNum++) {
SubmissionStepConfig stepConfig = submissionConfig.getStep(stepNum);
List<Object[]> stepInstancesAndConfigs = new ArrayList<Object[]>();
// we need to run the preProcess of all the appropriate steps and move on to the
// doPatchProcessing and postProcess step
// We will initialize the step classes just one time so that it will be the same
// instance over all the phase and we will reduce initialization time as well
for (int i = 0; i < submissionConfig.getNumberOfSteps(); i++) {
SubmissionStepConfig stepConfig = submissionConfig.getStep(i);
if (section.equals(stepConfig.getId())) {
sectionExist = true;
/*
* First, load the step processing class (using the current
* class loader)
*/
ClassLoader loader = this.getClass().getClassLoader();
Class stepClass;
try {
stepClass = loader.loadClass(stepConfig.getProcessingClassName());
}
/*
* First, load the step processing class (using the current
* class loader)
*/
ClassLoader loader = this.getClass().getClassLoader();
Class stepClass;
try {
stepClass = loader.loadClass(stepConfig.getProcessingClassName());
if (AbstractRestProcessingStep.class.isAssignableFrom(stepClass)) {
Object stepInstance = stepClass.newInstance();
if (stepInstance instanceof AbstractRestProcessingStep) {
// load the JSPStep interface for this step
AbstractRestProcessingStep stepProcessing =
(AbstractRestProcessingStep) stepClass.newInstance();
stepProcessing.doPreProcessing(context, source);
stepProcessing.doPatchProcessing(context,
getRequestService().getCurrentRequest(), source, op, stepConfig);
stepProcessing.doPostProcessing(context, source);
} else {
throw new DSpaceBadRequestException(
"The submission step class specified by '" + stepConfig.getProcessingClassName() +
"' does not extend the class org.dspace.submit.AbstractProcessingStep!" +
" Therefore it cannot be used by the Configurable Submission as the <processing-class>!");
}
} catch (UnprocessableEntityException e) {
throw e;
} catch (Exception e) {
log.error(e.getMessage(), e);
throw new PatchException("Error processing the patch request", e);
stepInstancesAndConfigs.add(new Object[] {stepInstance, stepConfig});
} else {
throw new DSpaceBadRequestException(
"The submission step class specified by '" + stepConfig.getProcessingClassName() +
"' does not extend the class org.dspace.app.rest.submit.AbstractRestProcessingStep!" +
" Therefore it cannot be used by the Configurable Submission as the <processing-class>!");
}
} catch (Exception e) {
log.error(e.getMessage(), e);
throw new PatchException("Error processing the patch request", e);
}
}
if (!sectionExist) {
throw new UnprocessableEntityException("The section with name " + section +
" does not exist in this submission!");
}
for (Object[] stepInstanceAndCfg : stepInstancesAndConfigs) {
AbstractRestProcessingStep step = (AbstractRestProcessingStep) stepInstanceAndCfg[0];
step.doPreProcessing(context, source);
}
for (Object[] stepInstanceAndCfg : stepInstancesAndConfigs) {
// only the step related to the involved section need to be invoked
SubmissionStepConfig stepConfig = (SubmissionStepConfig) stepInstanceAndCfg[1];
if (!section.equals(stepConfig.getId())) {
continue;
}
AbstractRestProcessingStep step = (AbstractRestProcessingStep) stepInstanceAndCfg[0];
try {
step.doPatchProcessing(context, getRequestService().getCurrentRequest(), source, op,
stepConfig);
} catch (UnprocessableEntityException e) {
throw e;
} catch (Exception e) {
log.error(e.getMessage(), e);
throw new PatchException("Error processing the patch request", e);
}
}
for (Object[] stepInstanceAndCfg : stepInstancesAndConfigs) {
AbstractRestProcessingStep step = (AbstractRestProcessingStep) stepInstanceAndCfg[0];
step.doPostProcessing(context, source);
}
}
@PreAuthorize("hasPermission(#id, 'WORKSPACEITEM', 'DELETE')")

View File

@@ -56,6 +56,19 @@ public interface AbstractRestProcessingStep extends ListenerProcessingStep {
public <T extends Serializable> T getData(SubmissionService submissionService, InProgressSubmission obj,
SubmissionStepConfig config) throws Exception;
/**
* Method to inform the converter that this step has it own section data. This
* can be overriden by step that only process/validate data in other section. In
* such case the @link
* {@link #getData(SubmissionService, InProgressSubmission, SubmissionStepConfig)}
* method should return null as it will be ignored
*
* @return true by default to indicate that the step has it own section data
*/
default public boolean hasDataSection() {
return true;
}
/**
* The method will expose the list of validation errors identified by the step. The default implementation will
* found a {@link Validation} spring bean in the context with the same name that the step id

View File

@@ -25,7 +25,15 @@ import org.dspace.services.model.Request;
*
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
*/
public class CollectionStep extends org.dspace.submit.step.SelectCollectionStep implements AbstractRestProcessingStep {
public class CollectionStep implements AbstractRestProcessingStep {
@Override
public void doPreProcessing(Context context, InProgressSubmission wsi) {
}
@Override
public void doPostProcessing(Context context, InProgressSubmission wsi) {
}
@Override
public UUID getData(SubmissionService submissionService, InProgressSubmission obj, SubmissionStepConfig config) {

View File

@@ -31,6 +31,7 @@ import org.dspace.content.MetadataValue;
import org.dspace.core.Context;
import org.dspace.core.Utils;
import org.dspace.services.model.Request;
import org.dspace.submit.AbstractProcessingStep;
/**
* Describe step for DSpace Spring Rest. Expose and allow patching of the in progress submission metadata. It is
@@ -39,7 +40,7 @@ import org.dspace.services.model.Request;
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
* @author Andrea Bollini (andrea.bollini at 4science.it)
*/
public class DescribeStep extends org.dspace.submit.step.DescribeStep implements AbstractRestProcessingStep {
public class DescribeStep extends AbstractProcessingStep implements AbstractRestProcessingStep {
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(DescribeStep.class);
@@ -49,6 +50,14 @@ public class DescribeStep extends org.dspace.submit.step.DescribeStep implements
inputReader = new DCInputsReader();
}
@Override
public void doPreProcessing(Context context, InProgressSubmission wsi) {
}
@Override
public void doPostProcessing(Context context, InProgressSubmission wsi) {
}
@Override
public DataDescribe getData(SubmissionService submissionService, InProgressSubmission obj,
SubmissionStepConfig config) {

View File

@@ -9,105 +9,207 @@ package org.dspace.app.rest.submit.step;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;
import gr.ekt.bte.core.Record;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.Value;
import gr.ekt.bte.dataloader.FileDataLoader;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.Equator;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.app.rest.model.ErrorRest;
import org.dspace.app.rest.repository.WorkspaceItemRestRepository;
import org.dspace.app.rest.model.patch.Operation;
import org.dspace.app.rest.submit.AbstractRestProcessingStep;
import org.dspace.app.rest.submit.SubmissionService;
import org.dspace.app.rest.submit.UploadableStep;
import org.dspace.app.rest.utils.Utils;
import org.dspace.app.util.SubmissionStepConfig;
import org.dspace.content.InProgressSubmission;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.dto.MetadataValueDTO;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Context;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.submit.extraction.MetadataExtractor;
import org.dspace.submit.step.ExtractionStep;
import org.dspace.external.model.ExternalDataObject;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
import org.dspace.importer.external.service.ImportService;
import org.dspace.services.model.Request;
import org.dspace.submit.listener.MetadataListener;
import org.dspace.utils.DSpace;
import org.springframework.web.multipart.MultipartFile;
/**
* This submission step allows to extract metadata from an uploaded file to enrich or initialize a submission. The
* processing is delegated to a list of extractor specialized by format (i.e. a Grobid extractor to get data from a PDF
* file, an extractor to get data from bibliographic file such as BibTeX, etc)
* This submission step allows to extract metadata from an uploaded file and/or
* use provided identifiers/metadata to further enrich a submission.
*
* The processing of the file is delegated to the Import Service (see
* {@link ImportService} that can be extended with Data Provider specialized by
* format (i.e. a Grobid extractor to get data from a PDF file, an extractor to
* get data from bibliographic file such as BibTeX, etc)
*
* Some metadata are monitored by listener (see {@link MetadataListener} and when
* changed the are used to generate an identifier that is used to query the
* External Data Provider associated with the specific listener
*
* @author Luigi Andrea Pascarelli (luigiandrea.pascarelli at 4science.it)
* @author Andrea Bollini (andrea.bollini at 4science.it)
*/
public class ExtractMetadataStep extends ExtractionStep implements UploadableStep {
public class ExtractMetadataStep implements AbstractRestProcessingStep, UploadableStep {
private ItemService itemService = ContentServiceFactory.getInstance().getItemService();
private ImportService importService = new DSpace().getSingletonService(ImportService.class);
private MetadataListener listener = new DSpace().getSingletonService(MetadataListener.class);
// we need to use thread local as we need to store the status of the item before that changes are performed
private ThreadLocal<Map<String, List<MetadataValue>>> metadataMap =
new ThreadLocal<Map<String, List<MetadataValue>>>();
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(ExtractMetadataStep.class);
@Override
public void doPreProcessing(Context context, InProgressSubmission wsi) {
Map<String, List<MetadataValue>> metadataMapValue = new HashMap<String, List<MetadataValue>>();
for (String metadata : listener.getMetadataToListen()) {
String[] tokenized = org.dspace.core.Utils.tokenize(metadata);
List<MetadataValue> mm = itemService.getMetadata(wsi.getItem(), tokenized[0], tokenized[1],
tokenized[2], Item.ANY);
if (mm != null && !mm.isEmpty()) {
metadataMapValue.put(metadata, mm);
} else {
metadataMapValue.put(metadata, new ArrayList<MetadataValue>());
}
}
metadataMap.set(metadataMapValue);
}
@Override
public void doPostProcessing(Context context, InProgressSubmission wsi) {
Map<String, List<MetadataValue>> metadataMapValue = metadataMap.get();
Set<String> changedMetadata = getChangedMetadata(wsi.getItem(), listener.getMetadataToListen(),
metadataMapValue);
// are listened metadata changed?
try {
if (!changedMetadata.isEmpty()) {
ExternalDataObject obj = listener.getExternalDataObject(context, wsi.getItem(), changedMetadata);
if (obj != null) {
// add metadata to the item if no values are already here
Set<String> alreadyFilledMetadata = new HashSet();
for (MetadataValue mv : wsi.getItem().getMetadata()) {
alreadyFilledMetadata.add(mv.getMetadataField().toString('.'));
}
for (MetadataValueDTO metadataValue : obj.getMetadata()) {
StringJoiner joiner = new StringJoiner(".");
joiner.add(metadataValue.getSchema());
joiner.add(metadataValue.getElement());
if (StringUtils.isNoneBlank(metadataValue.getQualifier())) {
joiner.add(metadataValue.getQualifier());
}
if (!alreadyFilledMetadata.contains(joiner.toString())) {
itemService.addMetadata(context, wsi.getItem(), metadataValue.getSchema(),
metadataValue.getElement(), metadataValue.getQualifier(), null,
metadataValue.getValue());
}
}
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private Set<String> getChangedMetadata(Item item, Set<String> listenedMedata,
Map<String, List<MetadataValue>> previousValues) {
Set<String> changedMetadata = new HashSet<String>();
for (String metadata : listenedMedata) {
List<MetadataValue> prevMetadata = previousValues.get(metadata);
List<MetadataValue> currMetadata = itemService.getMetadataByMetadataString(item, metadata);
if (prevMetadata != null) {
if (currMetadata != null) {
if (!CollectionUtils.isEqualCollection(prevMetadata, currMetadata, new Equator<MetadataValue>() {
@Override
public boolean equate(MetadataValue o1, MetadataValue o2) {
return StringUtils.equals(o1.getValue(), o2.getValue())
&& StringUtils.equals(o1.getAuthority(), o2.getAuthority());
}
@Override
public int hash(MetadataValue o) {
return o.getValue().hashCode()
+ (o.getAuthority() != null ? o.getAuthority().hashCode() : 0);
}
})) {
// one or more values has been changed from the listened metadata
changedMetadata.add(metadata);
}
} else if (prevMetadata.size() != 0) {
// a value has been removed from the listened metadata
changedMetadata.add(metadata);
}
} else if (currMetadata != null && currMetadata.size() != 0) {
// a value has been added to the listened metadata
changedMetadata.add(metadata);
}
}
return changedMetadata;
}
@Override
public ErrorRest upload(Context context, SubmissionService submissionService, SubmissionStepConfig stepConfig,
InProgressSubmission wsi, MultipartFile multipartFile)
throws IOException {
Item item = wsi.getItem();
File file = Utils.getFile(multipartFile, "extract-metadata-step", stepConfig.getId());
try {
List<MetadataExtractor> extractors =
DSpaceServicesFactory.getInstance().getServiceManager().getServicesByType(MetadataExtractor.class);
File file = null;
for (MetadataExtractor extractor : extractors) {
FileDataLoader dataLoader = extractor.getDataLoader();
RecordSet recordSet = null;
if (extractor.getExtensions()
.contains(FilenameUtils.getExtension(multipartFile.getOriginalFilename()))) {
if (file == null) {
file = Utils.getFile(multipartFile, "submissionlookup-loader", stepConfig.getId());
ImportRecord record = importService.getRecord(file, multipartFile.getOriginalFilename());
if (record != null) {
// add metadata to the item if no values are already here
Set<String> alreadyFilledMetadata = new HashSet();
for (MetadataValue mv : item.getMetadata()) {
alreadyFilledMetadata.add(mv.getMetadataField().toString('.'));
}
for (MetadatumDTO metadataValue : record.getValueList()) {
StringJoiner joiner = new StringJoiner(".");
joiner.add(metadataValue.getSchema());
joiner.add(metadataValue.getElement());
if (StringUtils.isNoneBlank(metadataValue.getQualifier())) {
joiner.add(metadataValue.getQualifier());
}
if (!alreadyFilledMetadata.contains(joiner.toString())) {
itemService.addMetadata(context, item, metadataValue.getSchema(),
metadataValue.getElement(), metadataValue.getQualifier(), null,
metadataValue.getValue());
}
FileDataLoader fdl = (FileDataLoader) dataLoader;
fdl.setFilename(Utils.getFileName(multipartFile));
recordSet = convertFields(dataLoader.getRecords(), bteBatchImportService.getOutputMap());
enrichItem(context, recordSet.getRecords(), item);
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
ErrorRest result = new ErrorRest();
result.setMessage(e.getMessage());
result.getPaths().add("/" + WorkspaceItemRestRepository.OPERATION_PATH_SECTIONS + "/" + stepConfig.getId());
return result;
log.error("Error processing data", e);
throw new RuntimeException(e);
} finally {
file.delete();
}
return null;
}
private RecordSet convertFields(RecordSet recordSet, Map<String, String> fieldMap) {
RecordSet result = new RecordSet();
for (Record publication : recordSet.getRecords()) {
for (String fieldName : fieldMap.keySet()) {
String md = null;
if (fieldMap != null) {
md = fieldMap.get(fieldName);
}
@Override
public boolean hasDataSection() {
return false;
}
if (StringUtils.isBlank(md)) {
continue;
} else {
md = md.trim();
}
@Override
public <T extends Serializable> T getData(SubmissionService submissionService, InProgressSubmission obj,
SubmissionStepConfig config) throws Exception {
return null;
}
if (publication.isMutable()) {
List<Value> values = publication.getValues(md);
publication.makeMutable().removeField(md);
publication.makeMutable().addField(fieldName, values);
}
}
result.addRecord(publication);
}
return result;
@Override
public void doPatchProcessing(Context context, Request currentRequest, InProgressSubmission source, Operation op,
SubmissionStepConfig stepConf) throws Exception {
}
}

View File

@@ -243,10 +243,10 @@ public class SubmissionDefinitionsControllerIT extends AbstractControllerIntegra
Matchers.containsString("page=1"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$._links.last.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=1"), Matchers.containsString("size=1"))))
Matchers.containsString("page=2"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.totalElements", is(2)))
.andExpect(jsonPath("$.page.totalPages", is(2)))
.andExpect(jsonPath("$.page.totalElements", is(3)))
.andExpect(jsonPath("$.page.totalPages", is(3)))
.andExpect(jsonPath("$.page.number", is(0)));
getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions")
@@ -261,16 +261,43 @@ public class SubmissionDefinitionsControllerIT extends AbstractControllerIntegra
.andExpect(jsonPath("$._links.prev.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=0"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$._links.next.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=2"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$._links.self.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=1"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$._links.last.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=1"), Matchers.containsString("size=1"))))
Matchers.containsString("page=2"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.totalElements", is(2)))
.andExpect(jsonPath("$.page.totalPages", is(2)))
.andExpect(jsonPath("$.page.totalElements", is(3)))
.andExpect(jsonPath("$.page.totalPages", is(3)))
.andExpect(jsonPath("$.page.number", is(1)));
getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions")
.param("size", "1")
.param("page", "2"))
.andExpect(status().isOk())
.andExpect(content().contentType(contentType))
.andExpect(jsonPath("$._embedded.submissiondefinitions[0].id", is("extractiontestprocess")))
.andExpect(jsonPath("$._links.first.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=0"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$._links.prev.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=1"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$._links.next").doesNotExist())
.andExpect(jsonPath("$._links.self.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=2"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$._links.last.href", Matchers.allOf(
Matchers.containsString("/api/config/submissiondefinitions?"),
Matchers.containsString("page=2"), Matchers.containsString("size=1"))))
.andExpect(jsonPath("$.page.size", is(1)))
.andExpect(jsonPath("$.page.totalElements", is(3)))
.andExpect(jsonPath("$.page.totalPages", is(3)))
.andExpect(jsonPath("$.page.number", is(2)));
}
}

View File

@@ -4113,6 +4113,224 @@ public class WorkspaceItemRestRepositoryIT extends AbstractControllerIntegration
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title']").doesNotExist());
}
@Test
/**
* Test the metadata extraction step adding an identifier
*
* @throws Exception
*/
public void lookupPubmedMetadataTest() throws Exception {
context.turnOffAuthorisationSystem();
//** GIVEN **
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
.withName("Sub Community")
.build();
Collection col1 = CollectionBuilder.createCollection(context, child1, "123456789/extraction-test")
.withName("Collection 1").build();
String authToken = getAuthToken(admin.getEmail(), password);
WorkspaceItem witem = WorkspaceItemBuilder.createWorkspaceItem(context, col1)
.build();
WorkspaceItem witem2 = WorkspaceItemBuilder.createWorkspaceItem(context, col1)
.withTitle("This is a test title")
.build();
context.restoreAuthSystemState();
// try to add the pmid identifier
List<Operation> addId = new ArrayList<Operation>();
// create a list of values to use in add operation
List<Map<String, String>> values = new ArrayList<Map<String, String>>();
Map<String, String> value = new HashMap<String, String>();
value.put("value", "18926410");
values.add(value);
addId.add(new AddOperation("/sections/traditionalpageone/dc.identifier.other", values));
String patchBody = getPatchContent(addId);
getClient(authToken).perform(patch("/api/submission/workspaceitems/" + witem.getID())
.content(patchBody)
.contentType(MediaType.APPLICATION_JSON_PATCH_JSON))
.andExpect(status().isOk())
// testing lookup
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][0].value",
is("18926410")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title'][0].value",
is("Transfer of peanut allergy from the donor to a lung transplant recipient.")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpagetwo['dc.description.abstract'][0].value",
is(Matchers.notNullValue())))
;
// verify that the patch changes have been persisted
getClient(authToken).perform(get("/api/submission/workspaceitems/" + witem.getID()))
.andExpect(status().isOk())
// testing lookup
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][0].value",
is("18926410")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title'][0].value",
is("Transfer of peanut allergy from the donor to a lung transplant recipient.")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpagetwo['dc.description.abstract'][0].value",
is(Matchers.notNullValue())))
;
// verify that adding a pmid to a wsitem with already a title metadata will not alter the user input
getClient(authToken).perform(patch("/api/submission/workspaceitems/" + witem2.getID())
.content(patchBody)
.contentType(MediaType.APPLICATION_JSON_PATCH_JSON))
.andExpect(status().isOk())
// testing lookup
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][0].value",
is("18926410")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title'][0].value",
is("This is a test title")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpagetwo['dc.description.abstract'][0].value",
is(Matchers.notNullValue())))
;
// verify that we can remove metadata provided by pubmed
List<Operation> removeTitle = new ArrayList<Operation>();
removeTitle.add(new RemoveOperation("/sections/traditionalpageone/dc.title/0"));
String rmPatchBody = getPatchContent(removeTitle);
getClient(authToken).perform(patch("/api/submission/workspaceitems/" + witem2.getID())
.content(rmPatchBody)
.contentType(MediaType.APPLICATION_JSON_PATCH_JSON))
.andExpect(status().isOk())
// testing lookup
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][0].value",
is("18926410")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title']").doesNotExist())
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpagetwo['dc.description.abstract'][0].value",
is(Matchers.notNullValue())))
;
// verify that if we add more values to the listened metadata the lookup is not triggered again
// (i.e. the title stays empty)
List<Operation> addId2 = new ArrayList<Operation>();
addId2.add(new AddOperation("/sections/traditionalpageone/dc.identifier.other/-", value));
patchBody = getPatchContent(addId2);
getClient(authToken).perform(patch("/api/submission/workspaceitems/" + witem2.getID())
.content(patchBody)
.contentType(MediaType.APPLICATION_JSON_PATCH_JSON))
.andExpect(status().isOk())
// testing lookup
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][0].value",
is("18926410")))
// second copy of the added identifier
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][1].value",
is("18926410")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title']").doesNotExist())
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpagetwo['dc.description.abstract'][0].value",
is(Matchers.notNullValue())))
;
// verify that the patch changes have been persisted
getClient(authToken).perform(get("/api/submission/workspaceitems/" + witem2.getID()))
.andExpect(status().isOk())
// testing lookup
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][0].value",
is("18926410")))
// second copy of the added identifier
.andExpect(jsonPath("$.sections.traditionalpageone['dc.identifier.other'][1].value",
is("18926410")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title']").doesNotExist())
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is(Matchers.notNullValue())))
.andExpect(jsonPath("$.sections.traditionalpagetwo['dc.description.abstract'][0].value",
is(Matchers.notNullValue())))
;
}
@Test
public void uploadBibtexFileOnExistingSubmissionTest() throws Exception {
context.turnOffAuthorisationSystem();
//** GIVEN **
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
.withName("Sub Community")
.build();
Collection col1 = CollectionBuilder.createCollection(context, child1, "123456789/extraction-test")
.withName("Collection 1").build();
String authToken = getAuthToken(admin.getEmail(), password);
WorkspaceItem witem = WorkspaceItemBuilder.createWorkspaceItem(context, col1)
.build();
WorkspaceItem witem2 = WorkspaceItemBuilder.createWorkspaceItem(context, col1)
.withTitle("This is a test title")
.build();
context.restoreAuthSystemState();
InputStream bibtex = getClass().getResourceAsStream("bibtex-test.bib");
final MockMultipartFile bibtexFile = new MockMultipartFile("file", "/local/path/bibtex-test.bib",
"application/x-bibtex", bibtex);
try {
// adding a bibtex file with a single entry should automatically put the metadata in the bibtex file into
// the item
getClient(authToken).perform(fileUpload("/api/submission/workspaceitems/" + witem.getID())
.file(bibtexFile))
.andExpect(status().isCreated())
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title'][0].value",
is("My Article")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is("Nobody Jr")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is("2006")))
.andExpect(jsonPath("$.sections.upload.files[0]"
+ ".metadata['dc.source'][0].value",
is("/local/path/bibtex-test.bib")))
.andExpect(jsonPath("$.sections.upload.files[0]"
+ ".metadata['dc.title'][0].value",
is("bibtex-test.bib")));
// do again over a submission that already has a title, the manual input should be preserved
getClient(authToken).perform(fileUpload("/api/submission/workspaceitems/" + witem2.getID())
.file(bibtexFile))
.andExpect(status().isCreated())
.andExpect(jsonPath("$.sections.traditionalpageone['dc.title'][0].value",
is("This is a test title")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.contributor.author'][0].value",
is("Nobody Jr")))
.andExpect(jsonPath("$.sections.traditionalpageone['dc.date.issued'][0].value",
is("2006")))
.andExpect(jsonPath("$.sections.upload.files[0]"
+ ".metadata['dc.source'][0].value",
is("/local/path/bibtex-test.bib")))
.andExpect(jsonPath("$.sections.upload.files[0]"
+ ".metadata['dc.title'][0].value",
is("bibtex-test.bib")));
} finally {
bibtex.close();
}
}
@Test
public void patchAcceptLicenseWrontPathTest() throws Exception {
context.turnOffAuthorisationSystem();

View File

@@ -1,19 +0,0 @@
## Email sent to DSpace users when they BTE batch import fails.
##
## Parameters: {0} the export error
## {1} the URL to the feedback page
##
##
## See org.dspace.core.Email for information on the format of this file.
##
#set($subject = 'DSpace - The batch import was not completed.')
The batch import you initiated from the DSpace UI was not completed, due to the following reason:
${params[0]}
For more information you may contact your system administrator:
${params[1]}
The DSpace Team

View File

@@ -1,15 +0,0 @@
## Email sent to DSpace users when they successfully batch import items via BTE.
##
## Parameters: {0} the filepath to the mapfile created by the batch import
##
##
## See org.dspace.core.Email for information on the format of this file.
##
#set($subject = 'DSpace - Batch import successfully completed')
The batch item import you initiated from the DSpace UI has completed successfully.
You may find the mapfile for the import in the following path: ${params[0]}
The DSpace Team

View File

@@ -119,14 +119,15 @@
<processing-class>org.dspace.app.rest.submit.step.CCLicenseStep</processing-class>
<type>cclicense</type> </step-definition> -->
<!--Step will be to Check for potential duplicate -->
<!-- <step-definition id="detect-duplicate"> <heading>submit.progressbar.detect.duplicate</heading>
<processing-class>org.dspace.submit.step.DetectPotentialDuplicate</processing-class>
<type>duplicate</type> </step-definition> -->
<!--Step will be to Verify/Review everything -->
<!-- <step-definition id="verify"> <heading>submit.progressbar.verify</heading> <processing-class>org.dspace.submit.step.VerifyStep</processing-class>
<type>verify</type> </step-definition> -->
<!--Step will be to enrich the current submission querying external providers or processing the uploaded file -->
<!-- Uncomment this step to enrich the current submission using information extracted
from uploaded files or metadata. Please check also config/spring/api/step-processing-listener.xml
for further configuration -->
<!-- <step-definition id="extractionstep">
<heading>submit.progressbar.ExtractMetadataStep</heading>
<processing-class>org.dspace.app.rest.submit.step.ExtractMetadataStep</processing-class>
<type>extract</type>
</step-definition> -->
<!-- Fake Steps to test parsing of all options -->
<!-- <step-definition mandatory="false"> <heading>fake.submission.readonly</heading>
@@ -201,7 +202,7 @@
<!--Step will be to Upload the item -->
<step id="upload"/>
<!-- <step id="upload-with-embargo"/> -->
<!-- <step id="detect-duplicate"/> -->
<!-- <step id="extractionstep"/> -->
<!--Step will be to select a Creative Commons License -->
<!-- Uncomment this step to allow the user to select a Creative Commons -->

View File

@@ -1,470 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
<!-- **************************************************************************************************** -->
<!-- BTE Batch import Service -->
<!-- **************************************************************************************************** -->
<bean id="org.dspace.app.itemimport.BTEBatchImportService" class="org.dspace.app.itemimport.BTEBatchImportService">
<property name="dataLoaders">
<map>
<!-- Specify here any data loaders that you want to have access to in the command line batch import.
Key is the name that you need to specify in the "-i" option in the command line script when "-b"
option is used (which means BTE Batch import) -->
<entry key="crossrefXML" value-ref="crossRefFileDataLoader" />
<entry key="ciniiXML" value-ref="ciniiFileDataLoader" />
<entry key="bibtex" value-ref="bibTeXDataLoader" />
<entry key="ris" value-ref="risDataLoader" />
<entry key="endnote" value-ref="endnoteDataLoader" />
<entry key="csv" value-ref="csvDataLoader" />
<entry key="tsv" value-ref="tsvDataLoader" />
<entry key="oai" value-ref="oaipmhDataLoader" />
</map>
</property>
<!-- The map that will be used to map internal BTE keys to DSpace metadata fields -->
<property name="outputMap" ref="outputMap" />
<property name="transformationEngine" ref="batchImportTransformationEngine" />
</bean>
<!-- **************************************************************************************************** -->
<!-- BTE Batch import Transformation Engine -->
<!-- **************************************************************************************************** -->
<bean id="batchImportTransformationEngine" class="gr.ekt.bte.core.TransformationEngine">
<property name="workflow" ref="batchImportLinearWorkflow" />
</bean>
<!-- Specify here any filters or modifiers to run before the output -->
<bean id="batchImportLinearWorkflow" class="gr.ekt.bte.core.LinearWorkflow">
<property name="process">
<list>
<!-- Add here filters and modifiers -->
</list>
</property>
</bean>
<!-- **************************************************************************************************** -->
<!-- Submission Lookup Service -->
<!-- **************************************************************************************************** -->
<bean class="org.dspace.submit.lookup.SubmissionLookupService"
name="org.dspace.submit.lookup.SubmissionLookupService">
<property name="phase1TransformationEngine" ref="phase1TransformationEngine" />
<property name="phase2TransformationEngine" ref="phase2TransformationEngine" />
<!-- Uncomment the following property if you want specific fields to appear in the detail presentation
of a publication. Default values are the ones shown below -->
<!--
<property name="detailFields">
<list>
<value>title</value>
<value>authors</value>
<value>editors</value>
<value>translators</value>
<value>chairs</value>
<value>issued</value>
<value>abstract</value>
<value>doi</value>
<value>journal</value>
<value>volume</value>
<value>issue</value>
<value>publisher</value>
<value>jissn</value>
<value>jeissn</value>
<value>pisbn</value>
<value>eisbn</value>
<value>keywords</value>
<value>mesh</value>
<value>language</value>
<value>subtype</value>
<value>translators</value>
</list>
</property>
-->
</bean>
<!-- **************************************************************************************************** -->
<!-- Submission Lookup Transformation Engine (phase 1) -->
<!-- **************************************************************************************************** -->
<bean name="phase1TransformationEngine" class="gr.ekt.bte.core.TransformationEngine">
<property name="dataLoader" ref="multipleDataLoader"/>
<property name="workflow" ref="phase1LinearWorkflow"/>
<property name="outputGenerator" ref="org.dspace.submit.lookup.SubmissionLookupOutputGenerator"/>
</bean>
<bean name="multipleDataLoader" class="org.dspace.submit.lookup.MultipleSubmissionLookupDataLoader">
<property name="dataloadersMap">
<map>
<!-- Specify here any data loaders you want to include in the submission lookup process.
Dataloaders must either extend the "NetworkSubmissionLookupDataLoader" abstract class
or conform to "FileDataLoader" interface of BTE -->
<entry key="crossref" value-ref="crossRefOnlineDataLoader"/>
<entry key="cinii" value-ref="ciniiOnlineDataLoader"/>
<entry key="crossRefXML" value-ref="crossRefFileDataLoader"/>
<entry key="ciniiXML" value-ref="ciniiFileDataLoader"/>
<entry key="bibtex" value-ref="bibTeXDataLoader"/>
<entry key="ris" value-ref="risDataLoader"/>
<entry key="endnote" value-ref="endnoteDataLoader"/>
<entry key="csv" value-ref="csvDataLoader"/>
<entry key="tsv" value-ref="tsvDataLoader"/>
</map>
</property>
</bean>
<bean name="org.dspace.submit.lookup.SubmissionLookupOutputGenerator" class="org.dspace.submit.lookup.SubmissionLookupOutputGenerator"/>
<!-- Specify here any filters or modifiers to run before the output -->
<bean name="phase1LinearWorkflow" class="gr.ekt.bte.core.LinearWorkflow">
<property name="process">
<list>
<ref bean="removeLastDot"/>
</list>
</property>
</bean>
<!-- Remove the last dot in the specified field keys -->
<bean name="removeLastDot" class="org.dspace.submit.lookup.RemoveLastDotModifier">
<constructor-arg value="removeLastDot Modifier"/>
<property name="fieldKeys">
<list>
<value>title</value>
</list>
</property>
</bean>
<!-- **************************************************************************************************** -->
<!-- Submission Lookup Transformation Engine (phase 2) -->
<!-- **************************************************************************************************** -->
<bean name="phase2TransformationEngine" class="gr.ekt.bte.core.TransformationEngine">
<property name="dataLoader" ref="submissionItemDataLoader"/>
<property name="workflow" ref="phase2linearWorkflow"/>
<property name="outputGenerator" ref="org.dspace.submit.lookup.DSpaceWorkspaceItemOutputGenerator"/>
</bean>
<bean name="submissionItemDataLoader" class="org.dspace.submit.lookup.SubmissionItemDataLoader"/>
<!-- Specify here any filters or modifiers to run before the output -->
<bean name="phase2linearWorkflow" class="gr.ekt.bte.core.LinearWorkflow">
<property name="process">
<list>
<ref bean="fieldMergeModifier"/>
<ref bean="valueConcatenationModifier"/>
<ref bean="languageCodeModifier"/>
</list>
</property>
</bean>
<bean name="fieldMergeModifier" class="org.dspace.submit.lookup.FieldMergeModifier">
<property name="mergeFieldMap">
<map>
<entry key="allauthors">
<list>
<value>authors</value>
</list>
</entry>
<entry key="allkeywords">
<list>
<value>keywords</value>
<value>mesh</value>
</list>
</entry>
</map>
</property>
</bean>
<bean name="valueConcatenationModifier" class="org.dspace.submit.lookup.ValueConcatenationModifier">
<property name="field" value="allkeywords"/>
<property name="separator" value=";" />
<property name="whitespaceAfter" value="true" />
</bean>
<bean name="languageCodeModifier" class="org.dspace.submit.lookup.LanguageCodeModifier"/>
<bean name="org.dspace.submit.lookup.DSpaceWorkspaceItemOutputGenerator" class="org.dspace.submit.lookup.DSpaceWorkspaceItemOutputGenerator">
<property name="outputMap" ref="outputMap"/>
<property name="extraMetadataToKeep">
<list>
<value>dc.import.contributorauthor</value>
<value>dc.import.contributoreditor</value>
<value>dc.import.contributortranslator</value>
<!-- <value>dc.description.scopusurl</value>
<value>dc.description.scopuscitationcount</value>
<value>dc.description.scopuscitationurl</value>-->
</list>
</property>
</bean>
<!-- **************************************************************************************************** -->
<!-- DataLoader beans -->
<!-- **************************************************************************************************** -->
<!-- Each dataloader needs a mapping that defines how the input maps to
records. Internally every record organizes the data as key-list of values
pairs, and in order to be able to recall the values the keys should have
distinct names. Each data format has a way to address individual data, and
this is the key of the map. The value is the label that the record internally
associates with the specific data and is used in the output mapping as well. -->
<!-- The key of the map is just the BibTeX label (e.g. author, date, pages
etc). The value the label that the record internally associates with the
specific data. -->
<bean id="bibTeXDataLoader" class="gr.ekt.bteio.loaders.BibTeXDataLoader">
<property name="fieldMap">
<map>
<entry key="title" value="title" />
<entry key="author" value="authors" />
<entry key="journal" value="journal" />
<entry key="year" value="issued" />
<entry key="ISSN" value="jissn" />
</map>
</property>
</bean>
<!-- Each entry in a CSV is a row, and each column represents the same data
in each entry. For example the first column might record the item title,
the second the authors etc. The key of the field map is this number. Note
that the first column is number 0. The CSV data loader has the following extra parameters
that configure its behaviour: - skipLines: a number that instructs the reader
to ignore the first lines in the input file. Default value: 0 - separator:
a character that signifies how the values are separated. Default value ','
-quoteChar: individual values could include the separator character. For
example if the separator is ',' and there is an abstract, there is a high
probability that it will contain commas. If the value is quoted using the
quoteChar then separator characters inside it will be ignored. Default value
'"'. -valueSeparator: There are cases where a value is the concatenation
of multiple values. For instance a list of authors. In this case the CSVDataLoader
can split the CSV value into its individual values, if they are separated
with valueSeparator. This can be a full java regular expression. Default
value: null (the csv value is not used by default). -->
<bean id="csvDataLoader" class="gr.ekt.bteio.loaders.CSVDataLoader">
<property name="fieldMap">
<map>
<entry key="0" value="title" />
<entry key="1" value="authors" />
<entry key="2" value="issued" />
<entry key="3" value="journal" />
<entry key="4" value="abstract" />
<entry key="5" value="jissn" />
<entry key="6" value="subtype" />
</map>
</property>
<property name="skipLines" value="1" />
</bean>
<!-- A TSV file is exactly the same as a CSV one, with the difference that
the separator is a tab instead of a comma. See the comments in the previous
section. -->
<bean id="tsvDataLoader" class="gr.ekt.bteio.loaders.CSVDataLoader">
<property name="fieldMap">
<map>
<entry key="0" value="title" />
<entry key="1" value="authors" />
<entry key="2" value="issued" />
<entry key="3" value="journal" />
<entry key="4" value="abstract" />
<entry key="5" value="jissn" />
<entry key="6" value="subtype" />
</map>
</property>
<!-- This makes the CSV data loader able to load TSV data -->
<property name="separator" value="\u0009" />
<property name="skipLines" value="1" />
</bean>
<!-- The keys in the RIS data loader map have to be the RIS tags that
need to be loaded. -->
<bean id="risDataLoader" class="gr.ekt.bteio.loaders.RISDataLoader">
<property name="fieldMap">
<map>
<entry key="T1" value="title" />
<entry key="AU" value="authors" />
<entry key="SO" value="journal" />
<entry key="PY" value="issued" />
<entry key="SN" value="jissn" />
<entry key="PT" value="subtype" />
<entry key="AB" value="abstract" />
</map>
</property>
</bean>
<bean id="endnoteDataLoader" class="gr.ekt.bteio.loaders.EndnoteDataLoader">
<property name="fieldMap">
<map>
<entry key="TI" value="title" />
<entry key="AU" value="authors" />
<entry key="AB" value="abstract" />
<entry key="PY" value="issued" />
<entry key="SO" value="journal" />
</map>
</property>
</bean>
<!-- OAI Data Loader -->
<bean id="oaipmhDataLoader" class="gr.ekt.bteio.loaders.OAIPMHDataLoader">
<property name="fieldMap">
<map>
<entry key="title" value="title" />
<entry key="creator" value="authors" />
<entry key="description" value="abstract" />
<entry key="date" value="issued" />
<entry key="type" value="subtype" />
</map>
</property>
<property name="prefix" value="oai_dc" />
<property name="serverAddress"
value="http://ebooks.serrelib.gr/serrelib-oai/request" />
</bean>
<!-- CrossRef Data Loaders -->
<bean id="crossRefOnlineDataLoader" class="org.dspace.submit.lookup.CrossRefOnlineDataLoader">
<property name="searchProvider" value="false" />
<!-- For CrossRef service you need to obtain an API Key from CrossRef. Once you get it, add it
to the following configuration value
-->
<property name="apiKey" value="" />
<!-- Uncomment the following line if you want to define the max results returned by the
CrossRef free text (by author, title, date) search. Default value is 10
-->
<!-- <property name="maxResults" value="10" /> -->
<property name="fieldMap" ref="crossrefInputMap" />
</bean>
<bean id="crossRefFileDataLoader" class="org.dspace.submit.lookup.CrossRefFileDataLoader">
<property name="fieldMap" ref="crossrefInputMap" />
</bean>
<bean name="crossrefInputMap" class="java.util.HashMap" scope="prototype">
<constructor-arg>
<map key-type="java.lang.String" value-type="java.lang.String">
<entry key="journalTitle" value="journal" />
<entry key="doi" value="doi" />
<entry key="authors" value="authors" />
<entry key="printISSN" value="jissn" />
<entry key="electronicISSN" value="jeissn" />
<entry key="year" value="issued" />
<entry key="articleTitle" value="title" />
<entry key="volume" value="volume" />
<entry key="issue" value="issue" />
<entry key="firstPage" value="firstpage" />
<entry key="lastPage" value="lastpage" />
<entry key="printISBN" value="pisbn" />
<entry key="electronicISBN" value="eisbn" />
<entry key="editionNumber" value="editionnumber" />
<entry key="seriesTitle" value="seriestitle" />
<entry key="volumeTitle" value="volumetitle" />
<entry key="editors" value="editors" />
<entry key="translators" value="translators" />
<entry key="chairs" value="chairs" />
<entry key="doyType" value="subtype" />
<!-- Not used -->
<!--
<entry key="publicationType" value="" />
-->
</map>
</constructor-arg>
</bean>
<!-- CiNii -->
<bean id="ciniiOnlineDataLoader" class="org.dspace.submit.lookup.CiNiiOnlineDataLoader">
<property name="searchProvider" value="false" />
<!-- For CiNii service you need to obtain an Application ID from NII.
Once you get it, add it to the following configuration value.
For details, see http://ci.nii.ac.jp/info/en/api/developer.html
-->
<property name="appId" value="" />
<!-- Uncomment the following line if you want to define the max results
returned by the CiNii free text (by author, title, date) search.
Default value is 10
-->
<!-- <property name="maxResults" value="10" /> -->
<property name="fieldMap" ref="ciniiInputMap" />
</bean>
<bean id="ciniiFileDataLoader" class="org.dspace.submit.lookup.CiNiiFileDataLoader">
<property name="fieldMap" ref="ciniiInputMap" />
</bean>
<bean name="ciniiInputMap" class="java.util.HashMap" scope="prototype">
<constructor-arg>
<map key-type="java.lang.String" value-type="java.lang.String">
<entry key="naid" value="naid" />
<entry key="ncid" value="ncid" />
<entry key="issn" value="jissn" />
<entry key="journal" value="journal" />
<entry key="title" value="title" />
<entry key="issued" value="issued" />
<entry key="volume" value="volume" />
<entry key="issue" value="issue" />
<entry key="spage" value="firstpage" />
<entry key="epage" value="lastpage" />
<entry key="language" value="language" />
<entry key="description" value="abstract" />
<entry key="subjects" value="keywords" />
<entry key="authors" value="authors" />
<entry key="publisher" value="publisher" />
</map>
</constructor-arg>
</bean>
<!-- **************************************************************************************************** -->
<!-- Output Mapping -->
<!-- **************************************************************************************************** -->
<!-- The output generator needs a configuration on how to map internal records
to DSpace metadata fields. The following map specifies this relationship.
The value needs to be the label that the record internally associates with
the specific data and it is specified in the data loader beans. The key is
in the format of <schema>.<element>[.<qualifier>] and specified the dspace
metadata field that the value will map to. -->
<bean name="outputMap" class="java.util.HashMap" scope="prototype">
<constructor-arg>
<map key-type="java.lang.String" value-type="java.lang.String">
<entry value="jissn" key="dc.identifier.issn" />
<entry value="pisbn" key="dc.identifier.isbn" />
<entry value="journal" key="dc.source" />
<entry value="title" key="dc.title" />
<entry value="issued" key="dc.date.issued" />
<entry value="language" key="dc.language.iso" />
<entry value="subtype" key="dc.type" />
<entry value="authors" key="dc.contributor.author" />
<entry value="editors" key="dc.contributor.editor" />
<entry value="translators" key="dc.contributor.other" />
<entry value="chairs" key="dc.contributor.other" />
<entry value="abstract" key="dc.description.abstract" />
<entry value="allkeywords" key="dc.subject" />
<entry value="doi" key="dc.identifier" />
<entry value="publisher" key="dc.publisher" />
<!-- Not used - new metadata fields need to be declared for them in DSpace registry -->
<!--
<entry value="url" key="" />
<entry value="note" key="" />
<entry value="fulltextUrl" key="" />
<entry value="authorsWithAffiliation" key="" />
<entry value="publicationStatus" key="" />
<entry value="jeissn" key="" />
<entry value="volume" key="" />
<entry value="issue" key="" />
<entry value="firstpage" key="" />
<entry value="lastpage" key="" />
<entry value="eisbn" key="" />
<entry value="editionnumber" key="" />
<entry value="seriestitle" key="" />
<entry value="volumetitle" key="" />
<entry value="titleAlternative" key="" />
<entry value="authorAlternative" key="" />
<entry value="ncid" key="" />
<entry value="naid" key="" />
-->
</map>
</constructor-arg>
</bean>
</beans>

View File

@@ -1,30 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
<bean id="bibtex" class="org.dspace.submit.extraction.MetadataExtractor">
<property name="dataLoader" ref="bibTeXDataLoader"/>
<property name="configurationService" ref="org.dspace.services.ConfigurationService"/>
<property name="extensions">
<list>
<value>bibtex</value>
<value>bib</value>
</list>
</property>
</bean>
<!-- <bean id="grobid" class="org.dspace.submit.extraction.MetadataExtractor">
<property name="dataLoader" ref="grobidDataLoader"/>
<property name="configurationService" ref="org.dspace.services.ConfigurationService"/>
<property name="extensions">
<list>
<value>pdf</value>
</list>
</property>
</bean>
-->
</beans>

View File

@@ -4,19 +4,16 @@
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
<bean id="lookupListener" class="org.dspace.submit.listener.MetadataListener">
<property name="configurationService" ref="org.dspace.services.ConfigurationService"/>
<property name="metadata">
<bean id="org.dspace.submit.listener.MetadataListener" class="org.dspace.submit.listener.SimpleMetadataListener">
<property name="externalDataProvidersMap">
<map>
<entry key="dc.identifier.doi" value="doi"/>
<entry key="dc.identifier.other">
<list>
<ref bean="pubmedLiveImportDataProvider"/>
</list>
</entry>
</map>
</property>
<property name="dataloadersMap">
<map>
<entry key="crossref" value-ref="crossRefOnlineDataLoader"/>
<entry key="cinii" value-ref="ciniiOnlineDataLoader"/>
</map>
</property>
</bean>
</beans>