DS-4316 Support indirect entity refs during csv import

This commit is contained in:
Andrew Wood
2019-08-01 09:22:58 -04:00
parent f9886529f5
commit 7220cecfa6
2 changed files with 242 additions and 22 deletions

View File

@@ -170,6 +170,9 @@ public class DSpaceCSV implements Serializable {
if ("collection".equals(element)) {
// Store the heading
headings.add(element);
} else if ("rowName".equals(element)) {
// Store the heading
headings.add(element);
} else if ("action".equals(element)) { // Store the action
// Store the heading
headings.add(element);

View File

@@ -14,11 +14,14 @@ import java.io.InputStreamReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import javax.annotation.Nullable;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
@@ -37,6 +40,7 @@ import org.dspace.content.DSpaceObject;
import org.dspace.content.Entity;
import org.dspace.content.EntityType;
import org.dspace.content.Item;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchemaEnum;
import org.dspace.content.MetadataValue;
import org.dspace.content.Relationship;
@@ -49,6 +53,8 @@ import org.dspace.content.service.EntityService;
import org.dspace.content.service.EntityTypeService;
import org.dspace.content.service.InstallItemService;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.MetadataFieldService;
import org.dspace.content.service.MetadataValueService;
import org.dspace.content.service.RelationshipService;
import org.dspace.content.service.RelationshipTypeService;
import org.dspace.content.service.WorkspaceItemService;
@@ -60,7 +66,6 @@ import org.dspace.eperson.EPerson;
import org.dspace.eperson.factory.EPersonServiceFactory;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.util.UUIDUtils;
import org.dspace.workflow.WorkflowItem;
import org.dspace.workflow.WorkflowService;
import org.dspace.workflow.factory.WorkflowServiceFactory;
@@ -100,6 +105,20 @@ public class MetadataImport {
*/
protected static final String AC_PREFIX = "authority.controlled.";
/**
* Map of field:value to csv row number, used to resolve indirect entity references.
*
* @see #populateRefAndRowMap(DSpaceCSVLine, int, UUID)
*/
protected HashMap<String, Set<Integer>> csvRefMap = new HashMap<>();
/**
* Map of csv row number to UUID, used to resolve indirect entity references.
*
* @see #populateRefAndRowMap(DSpaceCSVLine, int, UUID)
*/
protected HashMap<Integer, UUID> csvRowMap = new HashMap<>();
/**
* Logger
*/
@@ -165,7 +184,10 @@ public class MetadataImport {
c.setMode(Context.Mode.BATCH_EDIT);
// Process each change
int rowCount = 1;
for (DSpaceCSVLine line : toImport) {
//Resolve references to other items
line = resolveEntityRefs(line);
// Get the DSpace item to compare with
UUID id = line.getID();
@@ -278,7 +300,7 @@ public class MetadataImport {
BulkEditChange whatHasChanged = new BulkEditChange();
for (String md : line.keys()) {
// Get the values we already have
if (!"id".equals(md)) {
if (!"id".equals(md) && !"rowName".equals(md)) {
// Get the values from the CSV
String[] fromCSV = line.get(md).toArray(new String[line.get(md).size()]);
@@ -416,6 +438,9 @@ public class MetadataImport {
c.uncacheEntity(wfItem);
c.uncacheEntity(item);
}
populateRefAndRowMap(line, rowCount, item == null ? null : item.getID());
// keep track of current rows processed
rowCount++;
}
c.setMode(originalMode);
@@ -438,7 +463,7 @@ public class MetadataImport {
* @throws AuthorizeException If something goes wrong
*/
private void handleRelationshipMetadataValueFromBulkEditMetadataValue(Item item, BulkEditMetadataValue dcv)
throws SQLException, AuthorizeException {
throws SQLException, AuthorizeException, MetadataImportException {
LinkedList<String> values = new LinkedList<>();
values.add(dcv.getValue());
LinkedList<String> authorities = new LinkedList<>();
@@ -464,7 +489,7 @@ public class MetadataImport {
*/
protected void compare(Item item, String[] fromCSV, boolean change,
String md, BulkEditChange changes, DSpaceCSVLine line)
throws SQLException, AuthorizeException {
throws SQLException, AuthorizeException, MetadataImportException {
// Log what metadata element we're looking at
String all = "";
for (String part : fromCSV) {
@@ -475,7 +500,7 @@ public class MetadataImport {
"item_id=" + item.getID() + ",fromCSV=" + all));
// Don't compare collections or actions
if (("collection".equals(md)) || ("action".equals(md))) {
if (("collection".equals(md)) || ("action".equals(md)) || ("rowName".equals(md))) {
return;
}
@@ -675,7 +700,8 @@ public class MetadataImport {
*/
private void handleRelationMetadata(Context c, Item item, String schema, String element, String qualifier,
String language, List<String> values, List<String> authorities,
List<Integer> confidences) throws SQLException, AuthorizeException {
List<Integer> confidences) throws SQLException, AuthorizeException,
MetadataImportException {
if (StringUtils.equals(element, "type") && StringUtils.isBlank(qualifier)) {
handleRelationTypeMetadata(c, item, schema, element, qualifier, language, values, authorities, confidences);
@@ -688,6 +714,28 @@ public class MetadataImport {
}
/**
* Gets an existing entity from a reference.
*
* @param context the context to use.
* @return the entity, which is guaranteed to exist.
* @throws MetadataImportException if the reference is badly formed or refers to a non-existing item.
*/
private Entity getEntity(Context context, String reference) throws MetadataImportException {
Entity entity = null;
UUID uuid = resolveEntityRef(context, reference);
// At this point, we have a uuid, so we can get an entity
try {
entity = entityService.findByItemId(context, uuid);
if (entity.getItem() == null) {
throw new IllegalArgumentException("No item found in repository with uuid: " + uuid);
}
return entity;
} catch (SQLException sqle) {
throw new MetadataImportException("Unable to find entity using reference: " + reference, sqle);
}
}
/**
* This method takes the item, element and values to determine what relationships should be built
* for these parameters and calls on the method to construct them
@@ -699,18 +747,15 @@ public class MetadataImport {
* @throws AuthorizeException If something goes wrong
*/
private void handleRelationOtherMetadata(Context c, Item item, String element, String value)
throws SQLException, AuthorizeException {
throws SQLException, AuthorizeException, MetadataImportException {
if (value.isEmpty()) {
return;
}
Entity entity = entityService.findByItemId(c, item.getID());
boolean left = false;
List<RelationshipType> acceptableRelationshipTypes = new LinkedList<>();
String url = handleService.resolveToURL(c, value);
UUID uuid = UUIDUtils.fromString(value);
if (uuid == null && StringUtils.isNotBlank(url)) {
return;
}
Entity relationEntity = entityService.findByItemId(c, uuid);
Entity relationEntity = getEntity(c, value);
List<RelationshipType> leftRelationshipTypesForEntity = entityService.getLeftRelationshipTypes(c, entity);
List<RelationshipType> rightRelationshipTypesForEntity = entityService.getRightRelationshipTypes(c, entity);
@@ -739,30 +784,30 @@ public class MetadataImport {
}
//There is exactly one
buildRelationObject(c, item, value, left, acceptableRelationshipTypes.get(0));
buildRelationObject(c, item, relationEntity.getItem(), left, acceptableRelationshipTypes.get(0));
}
/**
* This method creates the relationship for the item and stores it in the database
* @param c The relevant DSpace context
* @param item The item for which this relationship will be constructed
* @param value The value for the relationship
* @param otherItem The item for the relationship
* @param left A boolean indicating whether the item is the leftItem or the rightItem
* @param acceptedRelationshipType The acceptable relationship type
* @throws SQLException If something goes wrong
* @throws AuthorizeException If something goes wrong
*/
private void buildRelationObject(Context c, Item item, String value, boolean left,
private void buildRelationObject(Context c, Item item, Item otherItem, boolean left,
RelationshipType acceptedRelationshipType)
throws SQLException, AuthorizeException {
Item leftItem = null;
Item rightItem = null;
if (left) {
leftItem = item;
rightItem = itemService.findByIdOrLegacyId(c, value);
rightItem = otherItem;
} else {
rightItem = item;
leftItem = itemService.findByIdOrLegacyId(c, value);
leftItem = otherItem;
}
RelationshipType relationshipType = acceptedRelationshipType;
int leftPlace = relationshipService.findLeftPlaceByLeftItem(c, leftItem) + 1;
@@ -1374,6 +1419,8 @@ public class MetadataImport {
"notify - when adding new items using a workflow, send notification emails");
options.addOption("t", "template", false,
"template - when adding new items, use the collection template (if it exists)");
options.addOption("v", "validate-only", false,
"validate - just validate the csv, don't run the import");
options.addOption("h", "help", false, "help");
// Parse the command line arguments
@@ -1471,7 +1518,9 @@ public class MetadataImport {
MetadataImport importer = new MetadataImport(c, csv);
List<BulkEditChange> changes;
if (!line.hasOption('s')) {
boolean validateOnly = line.hasOption('v');
if (!line.hasOption('s') || validateOnly) {
// See what has changed
try {
changes = importer.runImport(false, useWorkflow, workflowNotify, useTemplate);
@@ -1485,7 +1534,7 @@ public class MetadataImport {
int changeCounter = displayChanges(changes, false);
// If there were changes, ask if we should execute them
if (changeCounter > 0) {
if (!validateOnly && changeCounter > 0) {
try {
// Ask the user if they want to make the changes
System.out.println("\n" + changeCounter + " item(s) will be changed\n");
@@ -1510,7 +1559,7 @@ public class MetadataImport {
try {
// If required, make the change
if (change) {
if (change && !validateOnly) {
try {
// Make the changes
changes = importer.runImport(true, useWorkflow, workflowNotify, useTemplate);
@@ -1537,4 +1586,172 @@ public class MetadataImport {
System.exit(1);
}
}
/**
* Gets a copy of the given csv line with all entity references resolved to UUID strings.
*
* @param line the csv line to process.
* @return a copy, with all references resolved.
* @throws MetadataImportException if there is an error resolving any entity reference.
*/
public DSpaceCSVLine resolveEntityRefs(DSpaceCSVLine line) throws MetadataImportException {
DSpaceCSVLine newLine = new DSpaceCSVLine(line.getID());
for (String key : line.keys()) {
if (key.split("\\.")[0].equalsIgnoreCase("relation")) {
if (line.get(key).size() > 0) {
for (String val : line.get(key)) {
String uuid = resolveEntityRef(c, val).toString();
newLine.add(key, uuid);
}
}
} else {
if (line.get(key).size() > 1) {
for (String value : line.get(key)) {
newLine.add(key, value);
}
} else {
newLine.add(key, line.get(key).get(0));
}
}
}
return newLine;
}
/**
* Populates the csvRefMap and csvRowMap for the given csv line.
*
* The csvRefMap is an index that keeps track of which rows have a specific value for
* a specific metadata field or the special "rowName" column. This is used to help resolve indirect
* entity references in the same CSV.
*
* The csvRowMap is a row number to UUID map, and contains an entry for every row that has
* been processed so far which has a known (minted) UUID for its item. This is used to help complete
* the resolution after the row number has been determined.
*
* @param line the csv line.
* @param rowNumber the row number.
* @param uuid the uuid of the item, which may be null if it has not been minted yet.
*/
private void populateRefAndRowMap(DSpaceCSVLine line, int rowNumber, @Nullable UUID uuid) {
if (uuid != null) {
csvRowMap.put(rowNumber, uuid);
}
for (String key : line.keys()) {
if (key.contains(".") && !key.split("\\.")[0].equalsIgnoreCase("relation") ||
key.equalsIgnoreCase("rowName")) {
for (String value : line.get(key)) {
String valueKey = key + ":" + value;
Set<Integer> rowNums = csvRefMap.get(valueKey);
if (rowNums == null) {
rowNums = new HashSet<>();
csvRefMap.put(valueKey, rowNums);
}
rowNums.add(rowNumber);
}
}
}
}
/**
* Gets the UUID of the item indicated by the given reference, which may be a direct UUID string, a row reference
* of the form rowName:VALUE, or a metadata value reference of the form schema.element[.qualifier]:VALUE.
*
* The reference may refer to a previously-processed item in the CSV or an item in the database.
*
* @param context the context to use.
* @param reference the reference.
* @return the uuid.
* @throws MetadataImportException if the reference is malformed or ambiguous (refers to multiple items).
*/
private UUID resolveEntityRef(Context context, String reference) throws MetadataImportException {
// value reference
UUID uuid = null;
if (!reference.contains(":")) {
// assume it's a UUID
try {
return UUID.fromString(reference);
} catch (IllegalArgumentException e) {
throw new MetadataImportException("Not a UUID or indirect entity reference: '" + reference + "'");
}
} else if (!reference.startsWith("rowName:") ) {
MetadataValueService metadataValueService = ContentServiceFactory.getInstance().getMetadataValueService();
MetadataFieldService metadataFieldService =
ContentServiceFactory.getInstance().getMetadataFieldService();
int i = reference.indexOf(":");
String mfValue = reference.substring(i + 1);
String mf[] = reference.substring(0, i).split("\\.");
if (mf.length < 2) {
throw new MetadataImportException("Bad metadata field in reference: '" + reference
+ "' (expected syntax is schema.element[.qualifier])");
}
String schema = mf[0];
String element = mf[1];
String qualifier = mf.length == 2 ? null : mf[2];
try {
MetadataField mfo = metadataFieldService.findByElement(context, schema, element, qualifier);
Iterator<MetadataValue> mdv = metadataValueService.findByFieldAndValue(context, mfo, mfValue);
if (mdv.hasNext()) {
MetadataValue mdvVal = mdv.next();
uuid = mdvVal.getDSpaceObject().getID();
if (mdv.hasNext()) {
throw new MetadataImportException("Ambiguous reference; multiple matches in db: " + reference);
}
}
} catch (SQLException e) {
throw new MetadataImportException("Error looking up item by metadata reference: " + reference, e);
}
}
Set<UUID> csvUUIDs = getMatchingCSVUUIDs(reference);
if (csvUUIDs.size() > 1) {
throw new MetadataImportException("Ambiguous reference; multiple matches in csv: " + reference);
} else if (csvUUIDs.size() == 1) {
UUID csvUUID = csvUUIDs.iterator().next();
if (csvUUID.equals(uuid)) {
return uuid; // one match from csv and db (same item)
} else if (uuid != null) {
throw new MetadataImportException("Ambiguous reference; multiple matches in db and csv: " + reference);
} else {
return csvUUID; // one match from csv
}
} else { // size == 0
if (uuid == null) {
throw new MetadataImportException("No matches found for reference: " + reference);
} else {
return uuid; // one match from db
}
}
}
/**
* Gets the set of matching lines as UUIDs that have already been processed given a metadata value.
*
* @param mdValueRef the metadataValue reference to search for.
* @return the set of matching lines as UUIDs.
*/
private Set<UUID> getMatchingCSVUUIDs(String mdValueRef) {
Set<UUID> set = new HashSet<>();
if (csvRefMap.containsKey(mdValueRef)) {
for (Integer rowNum : csvRefMap.get(mdValueRef)) {
set.add(getUUIDForRow(rowNum));
}
}
return set;
}
/**
* Gets the UUID of the item of a given row in the CSV, if it has been minted.
* If the UUID has not yet been minted, gets a UUID representation of the row
* (a UUID whose numeric value equals the row number).
*
* @param rowNum the row number.
* @return the UUID of the item
*/
private UUID getUUIDForRow(int rowNum) {
if (csvRowMap.containsKey(rowNum)) {
return csvRowMap.get(rowNum);
} else {
return new UUID(0, rowNum);
}
}
}