[DS-1084] authority field in Batch Metadata Editor (squashed)

This commit is contained in:
Ivan Masár
2012-08-08 13:58:06 +02:00
parent 0a62de7a2f
commit 462011ffe6
3 changed files with 230 additions and 39 deletions

View File

@@ -54,6 +54,13 @@ public class DSpaceCSV implements Serializable
/** The field separator in an escaped form for using in regexs */
protected static String escapedFieldSeparator;
/** The authority separator (defaults to dobule colon '::') */
protected static String authoritySeparator;
/** The authority separator in an escaped form for using in regexs */
protected static String escapedAuthoritySeparator;
/** Whether to export all metadata such as handles and provenance information */
private boolean exportAll;
@@ -226,6 +233,9 @@ public class DSpaceCSV implements Serializable
// Set the field separator
setFieldSeparator();
// Set the authority separator
setAuthoritySeparator();
// Create the headings
headings = new ArrayList<String>();
@@ -340,6 +350,32 @@ public class DSpaceCSV implements Serializable
escapedFieldSeparator = match.replaceAll("\\\\$1");
}
/**
* Set the authority separator for value with authority data.
*
* Is set in dspace.cfg as bulkedit.authorityseparator
*
* If not set, defaults to double colon '::'
*/
private void setAuthoritySeparator()
{
// Get the value separator
authoritySeparator = ConfigurationManager.getProperty("bulkedit", "authorityseparator");
if ((authoritySeparator != null) && (!"".equals(authoritySeparator.trim())))
{
authoritySeparator = authoritySeparator.trim();
}
else
{
authoritySeparator = "::";
}
// Now store the escaped version
Pattern spchars = Pattern.compile("([\\\\*+\\[\\](){}\\$.?\\^|])");
Matcher match = spchars.matcher(authoritySeparator);
escapedAuthoritySeparator = match.replaceAll("\\\\$1");
}
/**
* Add a DSpace item to the CSV file
*
@@ -390,7 +426,13 @@ public class DSpaceCSV implements Serializable
// Store the item
if (exportAll || okToExport(value))
{
line.add(key, value.value);
// Add authority and confidence if authority is not null
String mdValue = value.value;
if (value.authority != null && !"".equals(value.authority))
{
mdValue += authoritySeparator + value.authority + authoritySeparator + value.confidence;
}
line.add(key, mdValue);
if (!headings.contains(key))
{
headings.add(key);

View File

@@ -11,6 +11,7 @@ import org.apache.commons.cli.*;
import org.apache.log4j.Logger;
import org.dspace.content.*;
import org.dspace.content.authority.Choices;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import org.dspace.core.Constants;
@@ -27,7 +28,10 @@ import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* Metadata importer to allow the batch import of metadata from a file
@@ -45,10 +49,19 @@ public class MetadataImport
/** The lines to import */
List<DSpaceCSVLine> toImport;
/** The authority controlled fields */
private static Set<String> authorityControlled;
static
{
setAuthorizedMetadataFields();
}
/** The prefix of the authority controlled field */
private static final String AC_PREFIX = "authority.controlled.";
/** Logger */
private static final Logger log = Logger.getLogger(MetadataImport.class);
/**
* Create an instance of the metadata importer. Requires a context and an array of CSV lines
* to examine.
@@ -133,6 +146,18 @@ public class MetadataImport
{
// Get the values from the CSV
String[] fromCSV = line.get(md).toArray(new String[line.get(md).size()]);
// Remove authority unless the md is not authority controlled
if (!isAuthorityControlledField(md))
{
for (int i=0; i<fromCSV.length; i++)
{
int pos = fromCSV[i].indexOf(DSpaceCSV.authoritySeparator);
if (pos > -1)
{
fromCSV[i] = fromCSV[i].substring(0, pos);
}
}
}
// Compare
compare(item, fromCSV, change, md, whatHasChanged);
@@ -222,6 +247,19 @@ public class MetadataImport
// Get the values from the CSV
String[] fromCSV = line.get(md).toArray(new String[line.get(md).size()]);
// Remove authority unless the md is not authority controlled
if (!isAuthorityControlledField(md))
{
for (int i=0; i<fromCSV.length; i++)
{
int pos = fromCSV[i].indexOf(DSpaceCSV.authoritySeparator);
if (pos > -1)
{
fromCSV[i] = fromCSV[i].substring(0, pos);
}
}
}
// Add all the values from the CSV line
add(fromCSV, md, whatHasChanged);
}
@@ -298,7 +336,9 @@ public class MetadataImport
dcv.element,
dcv.qualifier,
dcv.language,
dcv.value);
dcv.value,
dcv.authority,
dcv.confidence);
}
// Should the workflow be used?
@@ -396,6 +436,7 @@ public class MetadataImport
String[] bits = md.split("\\[");
language = bits[1].substring(0, bits[1].length() - 1);
}
String[] bits = md.split("\\.");
String schema = bits[0];
String element = bits[1];
@@ -427,41 +468,21 @@ public class MetadataImport
int i = 0;
for (DCValue dcv : current)
{
dcvalues[i] = dcv.value;
if (dcv.authority == null || !isAuthorityControlledField(md))
{
dcvalues[i] = dcv.value;
}
else
{
dcvalues[i] = dcv.value + DSpaceCSV.authoritySeparator + dcv.authority;
dcvalues[i] += DSpaceCSV.authoritySeparator + (dcv.confidence != -1 ? dcv.confidence : Choices.CF_ACCEPTED);
}
i++;
log.debug(LogManager.getHeader(c, "metadata_import",
"item_id=" + item.getID() + ",fromCSV=" + all +
",found=" + dcv.value));
}
// Compare from csv->current
for (String value : dcvalues)
{
// Look to see if it should be removed
DCValue dcv = new DCValue();
dcv.schema = schema;
dcv.element = element;
dcv.qualifier = qualifier;
dcv.language = language;
dcv.value = value;
if ((value != null) && (!"".equals(value)) && (!contains(value, fromCSV)))
{
// Remove it
log.debug(LogManager.getHeader(c, "metadata_import",
"item_id=" + item.getID() + ",fromCSV=" + all +
",removing_schema=" + schema +
",removing_element=" + element +
",removing_qualifier=" + qualifier +
",removing_language=" + language));
changes.registerRemove(dcv);
}
else
{
// Keep it
changes.registerConstant(dcv);
}
}
// Compare from current->csv
for (String value : fromCSV)
{
@@ -471,11 +492,65 @@ public class MetadataImport
dcv.element = element;
dcv.qualifier = qualifier;
dcv.language = language;
dcv.value = value;
if (value == null || value.indexOf(DSpaceCSV.authoritySeparator) < 0)
{
dcv.value = value;
dcv.authority = null;
dcv.confidence = Choices.CF_UNSET;
}
else
{
String[] parts = value.split(DSpaceCSV.escapedAuthoritySeparator);
dcv.value = parts[0];
dcv.authority = parts[1];
dcv.confidence = (parts.length > 2 ? Integer.valueOf(parts[2]) : Choices.CF_ACCEPTED);
}
if ((value != null) && (!"".equals(value)) && (!contains(value, dcvalues)))
{
changes.registerAdd(dcv);
}
else
{
// Keep it
changes.registerConstant(dcv);
}
}
// Compare from csv->current
for (String value : dcvalues)
{
// Look to see if it should be removed
DCValue dcv = new DCValue();
dcv.schema = schema;
dcv.element = element;
dcv.qualifier = qualifier;
dcv.language = language;
if (value == null || value.indexOf(DSpaceCSV.authoritySeparator) < 0)
{
dcv.value = value;
dcv.authority = null;
dcv.confidence = Choices.CF_UNSET;
}
else
{
String[] parts = value.split(DSpaceCSV.escapedAuthoritySeparator);
dcv.value = parts[0];
dcv.authority = parts[1];
dcv.confidence = (parts.length > 2 ? Integer.valueOf(parts[2]) : Choices.CF_ACCEPTED);
}
if ((value != null) && (!"".equals(value)) && (!contains(value, fromCSV)))
{
// Remove it
log.debug(LogManager.getHeader(c, "metadata_import",
"item_id=" + item.getID() + ",fromCSV=" + all +
",removing_schema=" + schema +
",removing_element=" + element +
",removing_qualifier=" + qualifier +
",removing_language=" + language));
changes.registerRemove(dcv);
}
}
// Update the item if it has changed
@@ -485,6 +560,8 @@ public class MetadataImport
// Get the complete list of what values should now be in that element
List<DCValue> list = changes.getComplete();
List<String> values = new ArrayList<String>();
List<String> authorities = new ArrayList<String>();
List<Integer> confidences = new ArrayList<Integer>();
for (DCValue value : list)
{
if ((qualifier == null) && (language == null))
@@ -495,6 +572,8 @@ public class MetadataImport
(value.language == null))
{
values.add(value.value);
authorities.add(value.authority);
confidences.add(value.confidence);
}
}
else if (qualifier == null)
@@ -505,6 +584,8 @@ public class MetadataImport
(value.qualifier == null))
{
values.add(value.value);
authorities.add(value.authority);
confidences.add(value.confidence);
}
}
else if (language == null)
@@ -515,6 +596,8 @@ public class MetadataImport
(value.language == null))
{
values.add(value.value);
authorities.add(value.authority);
confidences.add(value.confidence);
}
}
else
@@ -525,6 +608,8 @@ public class MetadataImport
(language.equals(value.language)))
{
values.add(value.value);
authorities.add(value.authority);
confidences.add(value.confidence);
}
}
}
@@ -532,7 +617,13 @@ public class MetadataImport
// Set those values
item.clearMetadata(schema, element, qualifier, language);
String[] theValues = values.toArray(new String[values.size()]);
item.addMetadata(schema, element, qualifier, language, theValues);
String[] theAuthorities = authorities.toArray(new String[authorities.size()]);
int[] theConfidences = new int[confidences.size()];
for (int k=0; k< confidences.size(); k++)
{
theConfidences[k] = confidences.get(k).intValue();
}
item.addMetadata(schema, element, qualifier, language, theValues, theAuthorities, theConfidences);
item.update();
}
}
@@ -745,7 +836,19 @@ public class MetadataImport
dcv.element = element;
dcv.qualifier = qualifier;
dcv.language = language;
dcv.value = value;
if (value == null || value.indexOf(DSpaceCSV.authoritySeparator) < 0)
{
dcv.value = value;
dcv.authority = null;
dcv.confidence = Choices.CF_UNSET;
}
else
{
String[] parts = value.split(DSpaceCSV.escapedAuthoritySeparator);
dcv.value = parts[0];
dcv.authority = parts[1];
dcv.confidence = (parts.length > 2 ? Integer.valueOf(parts[2]) : Choices.CF_ACCEPTED);
}
// Add it
if ((value != null) && (!"".equals(value)))
@@ -980,7 +1083,13 @@ public class MetadataImport
{
System.out.print(" + Added (" + md + "): ");
}
System.out.println(dcv.value);
System.out.print(dcv.value);
if (isAuthorityControlledField(md))
{
System.out.print(", authority = " + dcv.authority);
System.out.print(", confidence = " + dcv.confidence);
}
System.out.println("");
}
// Show removals
@@ -997,17 +1106,54 @@ public class MetadataImport
}
if (!changed)
{
System.out.println(" - Remove (" + md + "): " + dcv.value);
System.out.print(" - Remove (" + md + "): ");
}
else
{
System.out.println(" - Removed (" + md + "): " + dcv.value);
System.out.print(" - Removed (" + md + "): ");
}
System.out.print(dcv.value);
if (isAuthorityControlledField(md))
{
System.out.print(", authority = " + dcv.authority);
System.out.print(", confidence = " + dcv.confidence);
}
System.out.println("");
}
}
return changeCounter;
}
/**
* is the field is defined as authority controlled
*
*/
private static boolean isAuthorityControlledField(String md)
{
int pos = md.indexOf("[");
String mdf = (pos > -1 ? md.substring(0, pos) : md);
return authorityControlled.contains(mdf);
}
/**
* Set authority controlled fields
*
*/
private static void setAuthorizedMetadataFields()
{
authorityControlled = new HashSet<String>();
Enumeration propertyNames = ConfigurationManager.getProperties().propertyNames();
while(propertyNames.hasMoreElements())
{
String key = ((String) propertyNames.nextElement()).trim();
if (key.startsWith(AC_PREFIX)
&& ConfigurationManager.getBooleanProperty(key, false))
{
authorityControlled.add(key.substring(AC_PREFIX.length()));
}
}
}
/**
* main method to run the metadata exporter
*