DS-982 various usability improvements to curation system

git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@6628 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
Richard Rodgers
2011-08-31 18:27:59 +00:00
parent c568f381d6
commit f16e309240
6 changed files with 746 additions and 254 deletions

View File

@@ -9,12 +9,16 @@ package org.dspace.curate;
import java.io.IOException; import java.io.IOException;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.Properties;
import org.apache.log4j.Logger;
import org.dspace.content.Collection; import org.dspace.content.Collection;
import org.dspace.content.Community; import org.dspace.content.Community;
import org.dspace.content.DSpaceObject; import org.dspace.content.DSpaceObject;
import org.dspace.content.Item; import org.dspace.content.Item;
import org.dspace.content.ItemIterator; import org.dspace.content.ItemIterator;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants; import org.dspace.core.Constants;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.handle.HandleManager; import org.dspace.handle.HandleManager;
@@ -31,6 +35,10 @@ public abstract class AbstractCurationTask implements CurationTask
protected Curator curator = null; protected Curator curator = null;
// curator-assigned taskId // curator-assigned taskId
protected String taskId = null; protected String taskId = null;
// optional task configuration properties
private Properties taskProps = null;
// logger
private static Logger log = Logger.getLogger(AbstractCurationTask.class);
@Override @Override
public void init(Curator curator, String taskId) throws IOException public void init(Curator curator, String taskId) throws IOException
@@ -150,4 +158,125 @@ public abstract class AbstractCurationTask implements CurationTask
{ {
curator.setResult(taskId, result); curator.setResult(taskId, result);
} }
/**
* Returns task configuration property value for passed name, else
* <code>null</code> if no properties defined or no value for passed key.
*
* @param name
* the property name
* @return value
* the property value, or null
*
*/
protected String taskProperty(String name)
{
if (taskProps == null)
{
// load properties
taskProps = new Properties();
StringBuilder modName = new StringBuilder();
for (String segment : taskId.split("\\."))
{
// load property segments if present
modName.append(segment);
Properties modProps = ConfigurationManager.getProperties(modName.toString());
if (modProps != null)
{
taskProps.putAll(modProps);
}
modName.append(".");
}
// warn if *no* properties found
if (taskProps.size() == 0)
{
log.warn("Warning: No configuration properties found for task: " + taskId);
}
}
return taskProps.getProperty(name);
}
/**
* Returns task configuration integer property value for passed name, else
* passed default value if no properties defined or no value for passed key.
*
* @param name
* the property name
* @param defaultValue value
* the default value
* @return value
* the property value, or default value
*
*/
protected int taskIntProperty(String name, int defaultValue)
{
int intVal = defaultValue;
String strVal = taskProperty(name);
if (strVal != null)
{
try
{
intVal = Integer.parseInt(strVal.trim());
}
catch(NumberFormatException nfE)
{
log.warn("Warning: Number format error in module: " + taskId + " property: " + name);
}
}
return intVal;
}
/**
* Returns task configuration long property value for passed name, else
* passed default value if no properties defined or no value for passed key.
*
* @param name
* the property name
* @param defaultValue value
* the default value
* @return value
* the property value, or default
*
*/
protected long taskLongProperty(String name, long defaultValue)
{
long longVal = defaultValue;
String strVal = taskProperty(name);
if (strVal != null)
{
try
{
longVal = Long.parseLong(strVal.trim());
}
catch(NumberFormatException nfE)
{
log.warn("Warning: Number format error in module: " + taskId + " property: " + name);
}
}
return longVal;
}
/**
* Returns task configuration boolean property value for passed name, else
* passed default value if no properties defined or no value for passed key.
*
* @param name
* the property name
* @param defaultValue value
* the default value
* @return value
* the property value, or default
*
*/
protected boolean taskBooleanProperty(String name, boolean defaultValue)
{
String strVal = taskProperty(name);
if (strVal != null)
{
strVal = strVal.trim();
return strVal.equalsIgnoreCase("true") ||
strVal.equalsIgnoreCase("yes");
}
return defaultValue;
}
} }

View File

@@ -17,7 +17,7 @@ import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options; import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser; import org.apache.commons.cli.PosixParser;
import org.dspace.content.Community; import org.dspace.content.Site;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.core.PluginManager; import org.dspace.core.PluginManager;
import org.dspace.eperson.EPerson; import org.dspace.eperson.EPerson;
@@ -48,6 +48,10 @@ public class CurationCli
"email address of curating eperson"); "email address of curating eperson");
options.addOption("r", "reporter", true, options.addOption("r", "reporter", true,
"reporter to manage results - use '-' to report to console. If absent, no reporting"); "reporter to manage results - use '-' to report to console. If absent, no reporting");
options.addOption("l", "limit", true,
"maximum number of objects allowed in context cache. If absent, no limit");
options.addOption("s", "scope", true,
"transaction scope to impose: use 'object', 'curation', or 'open'. If absent, 'open' applies");
options.addOption("v", "verbose", false, options.addOption("v", "verbose", false,
"report activity to stdout"); "report activity to stdout");
options.addOption("h", "help", false, "help"); options.addOption("h", "help", false, "help");
@@ -60,6 +64,8 @@ public class CurationCli
String taskQueueName = null; String taskQueueName = null;
String ePersonName = null; String ePersonName = null;
String reporterName = null; String reporterName = null;
String limit = null;
String scope = null;
boolean verbose = false; boolean verbose = false;
if (line.hasOption('h')) if (line.hasOption('h'))
@@ -104,6 +110,16 @@ public class CurationCli
{ // report file { // report file
reporterName = line.getOptionValue('r'); reporterName = line.getOptionValue('r');
} }
if (line.hasOption('l'))
{ // cache limit
limit = line.getOptionValue('l');
}
if (line.hasOption('s'))
{ // transaction scope
scope = line.getOptionValue('s');
}
if (line.hasOption('v')) if (line.hasOption('v'))
{ // verbose { // verbose
@@ -122,6 +138,18 @@ public class CurationCli
System.out.println("A curation task or queue must be specified (-h for help)"); System.out.println("A curation task or queue must be specified (-h for help)");
System.exit(1); System.exit(1);
} }
if (limit != null && Integer.parseInt(limit) <= 0 )
{
System.out.println("Cache limit '" + limit + "' must be a positive integer");
System.exit(1);
}
if (scope != null && Curator.TxScope.valueOf(scope.toUpperCase()) == null)
{
System.out.println("Bad transaction scope '" + scope + "': only 'object', 'curation' or 'open' recognized");
System.exit(1);
}
Context c = new Context(); Context c = new Context();
if (ePersonName != null) if (ePersonName != null)
@@ -144,6 +172,15 @@ public class CurationCli
{ {
curator.setReporter(reporterName); curator.setReporter(reporterName);
} }
if (limit != null)
{
curator.setCacheLimit(Integer.parseInt(limit));
}
if (scope != null)
{
Curator.TxScope txScope = Curator.TxScope.valueOf(scope.toUpperCase());
curator.setTransactionScope(txScope);
}
// we are operating in batch mode, if anyone cares. // we are operating in batch mode, if anyone cares.
curator.setInvoked(Curator.Invoked.BATCH); curator.setInvoked(Curator.Invoked.BATCH);
// load curation tasks // load curation tasks
@@ -197,15 +234,8 @@ public class CurationCli
} }
if ("all".equals(idName)) if ("all".equals(idName))
{ {
// run on all top-level communities // run on whole Site
for (Community comm : Community.findAllTop(c)) curator.curate(c, Site.getSiteHandle());
{
if (verbose)
{
System.out.println("Curating community: " + comm.getHandle());
}
curator.curate(comm);
}
} }
else else
{ {

View File

@@ -52,16 +52,21 @@ public class Curator
// invocation modes - used by Suspendable tasks // invocation modes - used by Suspendable tasks
public static enum Invoked { INTERACTIVE, BATCH, ANY }; public static enum Invoked { INTERACTIVE, BATCH, ANY };
// transaction scopes
public static enum TxScope { OBJECT, CURATION, OPEN };
private static Logger log = Logger.getLogger(Curator.class); private static Logger log = Logger.getLogger(Curator.class);
private static final ThreadLocal<Integer> performer = new ThreadLocal<Integer>(); private static final ThreadLocal<Context> curationCtx = new ThreadLocal<Context>();
private Map<String, TaskRunner> trMap = new HashMap<String, TaskRunner>(); private Map<String, TaskRunner> trMap = new HashMap<String, TaskRunner>();
private List<String> perfList = new ArrayList<String>(); private List<String> perfList = new ArrayList<String>();
private TaskQueue taskQ = null; private TaskQueue taskQ = null;
private String reporter = null; private String reporter = null;
private Invoked iMode = null; private Invoked iMode = null;
private TaskResolver resolver = new TaskResolver();
private int cacheLimit = Integer.MAX_VALUE;
private TxScope txScope = TxScope.OPEN;
/** /**
* No-arg constructor * No-arg constructor
@@ -79,13 +84,13 @@ public class Curator
*/ */
public Curator addTask(String taskName) public Curator addTask(String taskName)
{ {
CurationTask task = TaskResolver.resolveTask(taskName); ResolvedTask task = resolver.resolveTask(taskName);
if (task != null) if (task != null)
{ {
try try
{ {
task.init(this, taskName); task.init(this);
trMap.put(taskName, new TaskRunner(task, taskName)); trMap.put(taskName, new TaskRunner(task));
// performance order currently FIFO - to be revisited // performance order currently FIFO - to be revisited
perfList.add(taskName); perfList.add(taskName);
} }
@@ -111,8 +116,7 @@ public class Curator
{ {
return perfList.contains(taskName); return perfList.contains(taskName);
} }
/** /**
* Removes a task from the set to be performed. * Removes a task from the set to be performed.
* *
@@ -150,6 +154,33 @@ public class Curator
this.reporter = reporter; this.reporter = reporter;
return this; return this;
} }
/**
* Sets an upper limit for the number of objects in the context cache
* used in a curation, if context accessible. Note that for many forms of
* invocation, the context is not accessible. If limit is reached,
* context cache will be emptied. The default is no limit.
*/
public Curator setCacheLimit(int limit)
{
cacheLimit = limit;
return this;
}
/**
* Defines the transactional scope of curator executions.
* The default is 'open' meaning that no commits are
* performed by the framework during curation. A scope of
* 'curation' means that a single commit will occur after the
* entire performance is complete, and a scope of 'object'
* will commit for each object (e.g. item) encountered in
* a given execution.
*/
public Curator setTransactionScope(TxScope scope)
{
txScope = scope;
return this;
}
/** /**
* Performs all configured tasks upon object identified by id. If * Performs all configured tasks upon object identified by id. If
@@ -168,12 +199,8 @@ public class Curator
} }
try try
{ {
//Save the currently authenticated user's ID to the current Task thread //Save the context on current execution thread
//(Allows individual tasks to retrieve current user info via currentPerformer() method) curationCtx.set(c);
if(c.getCurrentUser()!=null)
{
performer.set(Integer.valueOf(c.getCurrentUser().getID()));
}
DSpaceObject dso = HandleManager.resolveToObject(c, id); DSpaceObject dso = HandleManager.resolveToObject(c, id);
if (dso != null) if (dso != null)
@@ -187,6 +214,14 @@ public class Curator
trMap.get(taskName).run(c, id); trMap.get(taskName).run(c, id);
} }
} }
// if curation scoped, commit transaction
if (txScope.equals(TxScope.CURATION)) {
Context ctx = curationCtx.get();
if (ctx != null)
{
ctx.commit();
}
}
} }
catch (SQLException sqlE) catch (SQLException sqlE)
{ {
@@ -194,7 +229,7 @@ public class Curator
} }
finally finally
{ {
performer.remove(); curationCtx.remove();
} }
} }
@@ -221,8 +256,7 @@ public class Curator
{ {
TaskRunner tr = trMap.get(taskName); TaskRunner tr = trMap.get(taskName);
// do we need to iterate over the object ? // do we need to iterate over the object ?
if (type == Constants.ITEM || if (type == Constants.ITEM || tr.task.isDistributive())
tr.task.getClass().isAnnotationPresent(Distributive.class))
{ {
tr.run(dso); tr.run(dso);
} }
@@ -329,59 +363,21 @@ public class Curator
tr.setResult(result); tr.setResult(result);
} }
} }
/**
* Returns the Eperson ID of the Context currently in use in performing a task,
* if known, else <code>null</code>.
* <P>
* In many circumstances, this value will be null:
* when the curator is not in the perform method, when curation
* is invoked with a DSO (the context is 'hidden').
* <P>
* The primary intended use for this method is to ensure individual tasks,
* which may need to create a new Context, can also properly initialize that
* Context with an EPerson ID (to ensure proper access rights exist in that Context).
* <P>
* Current performer information is also used when executing Site-Wide tasks
* (see Curator.doSite() method).
*/
public static Integer currentPerformer()
{
return performer.get();
}
/** /**
* Returns a Context object which is "authenticated" as the current * Returns the context object used in the current curation performance.
* EPerson performer (see 'currentPerformer()' method). This is primarily a * This is primarily a utility method to allow tasks access to the context when necessary.
* utility method to allow tasks access to an authenticated Context when
* necessary.
* <P> * <P>
* If the 'currentPerformer()' is null or not set, then this just returns * If the context is null or not set, then this just returns
* a brand new Context object representing an Anonymous User. * a brand new Context object representing an Anonymous User.
* *
* @return authenticated Context object (or anonymous Context if currentPerformer() is null) * @return curation Context object (or anonymous Context if curation is null)
*/ */
public static Context authenticatedContext() public static Context curationContext() throws SQLException
throws SQLException
{ {
//Create a new context // Return curation context or new context if undefined
Context ctx = new Context(); Context curCtx = curationCtx.get();
return (curCtx != null) ? curCtx : new Context();
Integer epersonID = currentPerformer();
//If a Curator 'performer' ID is set
if(epersonID!=null)
{
//parse the performer's User ID & set as the currently authenticated user in Context
EPerson autenticatedUser = EPerson.find(ctx, epersonID.intValue());
ctx.setCurrentUser(autenticatedUser);
}
else
{
//otherwise, no-op. This is the equivalent of an ANONYMOUS USER Context
}
return ctx;
} }
/** /**
@@ -407,19 +403,15 @@ public class Curator
Context ctx = null; Context ctx = null;
try try
{ {
ctx = curationContext();
// Site-wide Tasks really should have an EPerson performer associated with them, // Site-wide Tasks really should have an EPerson performer associated with them,
// otherwise they are run as an "anonymous" user with limited access rights. // otherwise they are run as an "anonymous" user with limited access rights.
if(Curator.currentPerformer()==null) if(ctx.getCurrentUser()==null)
{ {
log.warn("You are running one or more Site-Wide curation tasks in ANONYMOUS USER mode," + log.warn("You are running one or more Site-Wide curation tasks in ANONYMOUS USER mode," +
" as there is no EPerson 'performer' associated with this task. To associate an EPerson 'performer' " + " as there is no EPerson 'performer' associated with this task. To associate an EPerson 'performer' " +
" you should ensure tasks are called via the Curator.curate(Context, ID) method."); " you should ensure tasks are called via the Curator.curate(Context, ID) method.");
} }
else
{
// Create a new Context for this Sitewide task, authenticated as the current task performer.
ctx = Curator.authenticatedContext();
}
//Run task for the Site object itself //Run task for the Site object itself
if (! tr.run(site)) if (! tr.run(site))
@@ -519,21 +511,43 @@ public class Curator
} }
return true; return true;
} }
/**
* Record a 'visit' to a DSpace object and enforce any policies set
* on this curator.
*/
private void visit(DSpaceObject dso) throws IOException
{
Context curCtx = curationCtx.get();
if (curCtx != null)
{
try
{
if (txScope.equals(TxScope.OBJECT))
{
curCtx.commit();
}
if (curCtx.getCacheSize() % cacheLimit == 0)
{
curCtx.clearCache();
}
}
catch (SQLException sqlE)
{
throw new IOException(sqlE.getMessage());
}
}
}
private class TaskRunner private class TaskRunner
{ {
CurationTask task = null; ResolvedTask task = null;
String taskName = null;
int statusCode = CURATE_UNSET; int statusCode = CURATE_UNSET;
String result = null; String result = null;
Invoked mode = null;
int[] codes = null;
public TaskRunner(CurationTask task, String name) public TaskRunner(ResolvedTask task)
{ {
this.task = task; this.task = task;
taskName = name;
parseAnnotations(task.getClass());
} }
public boolean run(DSpaceObject dso) throws IOException public boolean run(DSpaceObject dso) throws IOException
@@ -547,13 +561,13 @@ public class Curator
statusCode = task.perform(dso); statusCode = task.perform(dso);
String id = (dso.getHandle() != null) ? dso.getHandle() : "workflow item: " + dso.getID(); String id = (dso.getHandle() != null) ? dso.getHandle() : "workflow item: " + dso.getID();
log.info(logMessage(id)); log.info(logMessage(id));
visit(dso);
return ! suspend(statusCode); return ! suspend(statusCode);
} }
catch(IOException ioe) catch(IOException ioe)
{ {
//log error & pass exception upwards //log error & pass exception upwards
log.error("Error executing curation task '" + taskName + "'", ioe); log.error("Error executing curation task '" + task.getName() + "'", ioe);
throw ioe; throw ioe;
} }
} }
@@ -568,13 +582,13 @@ public class Curator
} }
statusCode = task.perform(c, id); statusCode = task.perform(c, id);
log.info(logMessage(id)); log.info(logMessage(id));
visit(null);
return ! suspend(statusCode); return ! suspend(statusCode);
} }
catch(IOException ioe) catch(IOException ioe)
{ {
//log error & pass exception upwards //log error & pass exception upwards
log.error("Error executing curation task '" + taskName + "'", ioe); log.error("Error executing curation task '" + task.getName() + "'", ioe);
throw ioe; throw ioe;
} }
} }
@@ -584,21 +598,12 @@ public class Curator
this.result = result; this.result = result;
} }
private void parseAnnotations(Class tClass)
{
Suspendable suspendAnn = (Suspendable)tClass.getAnnotation(Suspendable.class);
if (suspendAnn != null)
{
mode = suspendAnn.invoked();
codes = suspendAnn.statusCodes();
}
}
private boolean suspend(int code) private boolean suspend(int code)
{ {
Invoked mode = task.getMode();
if (mode != null && (mode.equals(Invoked.ANY) || mode.equals(iMode))) if (mode != null && (mode.equals(Invoked.ANY) || mode.equals(iMode)))
{ {
for (int i : codes) for (int i : task.getCodes())
{ {
if (code == i) if (code == i)
{ {
@@ -617,7 +622,7 @@ public class Curator
private String logMessage(String id) private String logMessage(String id)
{ {
StringBuilder mb = new StringBuilder(); StringBuilder mb = new StringBuilder();
mb.append("Curation task: ").append(taskName). mb.append("Curation task: ").append(task.getName()).
append(" performed on: ").append(id). append(" performed on: ").append(id).
append(" with status: ").append(statusCode); append(" with status: ").append(statusCode);
if (result != null) if (result != null)

View File

@@ -0,0 +1,147 @@
/*
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://dspace.org/license/
*/
package org.dspace.curate;
import java.io.IOException;
import org.dspace.content.DSpaceObject;
import org.dspace.core.Context;
/**
* ResolvedTask wraps an implementation of one of the CurationTask or
* ScriptedTask interfaces and provides for uniform invocation based on
* CurationTask methods.
*
* @author richardrodgers
*/
public class ResolvedTask
{
// wrapped objects
private CurationTask cTask;
private ScriptedTask sTask;
// local name of task
private String taskName;
// annotation data
private boolean distributive = false;
private boolean mutative = false;
private Curator.Invoked mode = null;
private int[] codes = null;
protected ResolvedTask(String taskName, CurationTask cTask)
{
this.taskName = taskName;
this.cTask = cTask;
// process annotations
Class ctClass = cTask.getClass();
distributive = ctClass.isAnnotationPresent(Distributive.class);
mutative = ctClass.isAnnotationPresent(Mutative.class);
Suspendable suspendAnno = (Suspendable)ctClass.getAnnotation(Suspendable.class);
if (suspendAnno != null)
{
mode = suspendAnno.invoked();
codes = suspendAnno.statusCodes();
}
}
protected ResolvedTask(String taskName, ScriptedTask sTask)
{
this.taskName = taskName;
this.sTask = sTask;
// annotation processing TBD
}
/**
* Initialize task - parameters inform the task of it's invoking curator.
* Since the curator can provide services to the task, this represents
* curation DI.
*
* @param curator the Curator controlling this task
* @throws IOException
*/
public void init(Curator curator) throws IOException
{
if (unscripted())
{
cTask.init(curator, taskName);
}
else
{
sTask.init(curator, taskName);
}
}
/**
* Perform the curation task upon passed DSO
*
* @param dso the DSpace object
* @return status code
* @throws IOException
*/
public int perform(DSpaceObject dso) throws IOException
{
return (unscripted()) ? cTask.perform(dso) : sTask.performDso(dso);
}
/**
* Perform the curation task for passed id
*
* @param ctx DSpace context object
* @param id persistent ID for DSpace object
* @return status code
* @throws Exception
*/
public int perform(Context ctx, String id) throws IOException
{
return (unscripted()) ? cTask.perform(ctx, id) : sTask.performId(ctx, id);
}
/**
* Returns local name of task
* @return name
* the local name of the task
*/
public String getName()
{
return taskName;
}
/**
* Returns whether task should be distributed through containers
*
*/
public boolean isDistributive()
{
return distributive;
}
/**
* Returns whether task alters (mutates) it's target objects
*
*/
public boolean isMutative()
{
return mutative;
}
public Curator.Invoked getMode()
{
return mode;
}
public int[] getCodes()
{
return codes;
}
private boolean unscripted()
{
return sTask == null;
}
}

View File

@@ -0,0 +1,56 @@
/*
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://dspace.org/license/
*/
package org.dspace.curate;
import java.io.IOException;
import org.dspace.content.DSpaceObject;
import org.dspace.core.Context;
/**
* ScriptedTask describes a rather generic ability to perform an operation
* upon a DSpace object. It's semantics are identical to the CurationTask interface,
* but is designed to be implemented in scripting languages, rather than
* Java. For this reason, the 'perform' methods are renamed to accomodate
* languages (like Ruby) that lack method overloading.
*
* @author richardrodgers
*/
public interface ScriptedTask
{
/**
* Initialize task - parameters inform the task of it's invoking curator.
* Since the curator can provide services to the task, this represents
* curation DI.
*
* @param curator the Curator controlling this task
* @param taskId identifier task should use in invoking services
* @throws IOException
*/
public void init(Curator curator, String taskId) throws IOException;
/**
* Perform the curation task upon passed DSO
*
* @param dso the DSpace object
* @return status code
* @throws IOException
*/
public int performDso(DSpaceObject dso) throws IOException;
/**
* Perform the curation task for passed id
*
* @param ctx DSpace context object
* @param id persistent ID for DSpace object
* @return status code
* @throws IOException
*/
public int performId(Context ctx, String id) throws IOException;
}

View File

@@ -1,154 +1,279 @@
/** /**
* The contents of this file are subject to the license and copyright * The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source * detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at * tree and available online at
* *
* http://www.dspace.org/license/ * http://www.dspace.org/license/
*/ */
package org.dspace.curate; package org.dspace.curate;
import java.io.File; import java.io.File;
import java.io.FileReader; import java.io.BufferedReader;
import java.io.FileNotFoundException; import java.io.FileReader;
import java.io.IOException; import java.io.FileWriter;
import java.io.Reader; import java.io.FileNotFoundException;
import java.util.HashMap; import java.io.IOException;
import java.util.Map; import java.io.Reader;
import java.io.Writer;
import javax.script.ScriptEngine; import java.util.Properties;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException; import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import org.apache.log4j.Logger; import javax.script.ScriptException;
import org.dspace.core.ConfigurationManager; import org.apache.log4j.Logger;
import org.dspace.core.PluginManager;
import org.dspace.core.ConfigurationManager;
/** import org.dspace.core.PluginManager;
* TaskResolver takes a logical name of a curation task and delivers a
* suitable implementation object. Supported implementation types include: /**
* (1) Classpath-local Java classes configured and loaded via PluginManager. * TaskResolver takes a logical name of a curation task and attempts to deliver
* (2) Local script-based tasks, viz. coded in any scripting language whose * a suitable implementation object. Supported implementation types include:
* runtimes are accessible via the JSR-223 scripting API. This really amounts * (1) Classpath-local Java classes configured and loaded via PluginManager.
* to the family of dynamic JVM languages: JRuby, Jython, Groovy, Javascript, etc * (2) Local script-based tasks, viz. coded in any scripting language whose
* Note that the requisite jars and other resources for these languages must be * runtimes are accessible via the JSR-223 scripting API. This really amounts
* installed in the DSpace instance for them to be used here. * to the family of dynamic JVM languages: JRuby, Jython, Groovy, Javascript, etc
* Further work may involve remote URL-loadable code, etc. * Note that the requisite jars and other resources for these languages must be
* * installed in the DSpace instance for them to be used here.
* Scripted tasks are configured in dspace/config/modules/curate.cfg with the * Further work may involve remote URL-loadable code, etc.
* property "script.tasks" with value syntax: *
* <task-desc> = taskName, * Scripted tasks are managed in a directory configured with the
* <task-desc> = taskName * dspace/config/modules/curate.cfg property "script.dir". A catalog of
* where task-desc is a descriptor of the form: * scripted tasks named 'task.catalog" is kept in this directory.
* <engine>:<relfilePath>:<implClassName> * Each task has a 'descriptor' property with value syntax:
* An example property value: * <engine>|<relFilePath>|<implClassCtor>
* * An example property:
* ruby:rubytask.rb:LinkChecker = linkchecker *
* * linkchecker = ruby|rubytask.rb|LinkChecker.new
* This descriptor means that the 'ruby' script engine will be created, *
* a script file named 'rubytask.rb' in the directory <taskbase>/ruby/rubtask.rb will be loaded * This descriptor means that a 'ruby' script engine will be created,
* and the resolver will expect that a class 'LinkChecker' will be defined in that script file. * a script file named 'rubytask.rb' in the directory <script.dir> will be
* * loaded and the resolver will expect an evaluation of 'LinkChecker.new' will
* @author richardrodgers * provide a correct implementation object.
*/ *
* Script files may embed their descriptors to facilitate deployment.
public class TaskResolver * To accomplish this, a script must include the descriptor string with syntax:
{ * $td=<descriptor> somewhere on a comment line. for example:
// logging service *
private static Logger log = Logger.getLogger(TaskResolver.class); * # My descriptor $td=ruby|rubytask.rb|LinkChecker.new
*
// base directory of task scripts * For portability, the <relFilePath> component may be omitted in this context.
private static String scriptDir = ConfigurationManager.getProperty("curate", "script.dir"); * Thus, $td=ruby||LinkChecker.new will be expanded to a descriptor
* with the name of the embedding file.
// map of task script descriptions, keyed by logical task name *
private static Map<String, String> scriptMap = new HashMap<String, String>(); * @author richardrodgers
*/
static
{ public class TaskResolver
// build map of task descriptors {
loadDescriptors(); // logging service
} private static Logger log = Logger.getLogger(TaskResolver.class);
private TaskResolver() // base directory of task scripts & catalog name
{ private static final String CATALOG = "task.catalog";
} private static final String scriptDir = ConfigurationManager.getProperty("curate", "script.dir");
/** // catalog of script tasks
* Loads the map of script descriptors private Properties catalog;
*/
public static void loadDescriptors() public TaskResolver()
{ {
scriptMap.clear(); }
String propVal = ConfigurationManager.getProperty("curate", "script.tasks");
if (propVal != null) /**
{ * Installs a task script. Succeeds only if script:
for (String desc : propVal.split(",")) * (1) exists in the configured script directory and
{ * (2) contains a recognizable descriptor in a comment line.
String[] parts = desc.split("="); * If script lacks a descriptor, it may still be installed
scriptMap.put(parts[1].trim(), parts[0].trim()); * by manually invoking <code>addDescriptor</code>.
} *
} * @param taskName
} * logical name of task to associate with script
* @param fileName
/** * name of file containing task script
* Returns a task implementation for a given task name, * @return true if script installed, false if installation failed
* or <code>null</code> if no implementation could be obtained. */
*/ public boolean installScript(String taskName, String fileName)
public static CurationTask resolveTask(String taskName) {
{ // Can we locate the file in the script directory?
CurationTask task = (CurationTask)PluginManager.getNamedPlugin("curate", CurationTask.class, taskName); File script = new File(scriptDir, fileName);
if (task == null) if (script.exists())
{ {
// maybe it is implemented by a script? BufferedReader reader = null;
String scriptDesc = scriptMap.get(taskName); try
if (scriptDesc != null) {
{ reader = new BufferedReader(new FileReader(script));
String[] descParts = scriptDesc.split(":"); String line = null;
// first descriptor token is name ('alias') of scripting engine, while((line = reader.readLine()) != null)
// which is also the subdirectory where script file kept {
ScriptEngineManager mgr = new ScriptEngineManager(); if (line.startsWith("#") && line.indexOf("$td=") > 0)
ScriptEngine engine = mgr.getEngineByName(descParts[0]); {
if (engine != null) String desc = line.substring(line.indexOf("$td=") + 4);
{ // insert relFilePath if missing
// see if we can locate the script file and load it String[] tokens = desc.split("\\|");
// the second token is the relative path to the file if (tokens[1].length() == 0)
File script = new File(scriptDir, descParts[1]); {
if (script.exists()) desc = tokens[0] + "|" + fileName + "|" + tokens[2];
{ }
try addDescriptor(taskName, desc);
{ return true;
Reader reader = new FileReader(script); }
engine.eval(reader); }
reader.close(); }
// third token is name of class implementing catch(IOException ioE)
// CurationTask interface - add ".new" to ask for an instance {
String implInst = descParts[2] + ".new"; log.error("Error reading task script: " + fileName);
task = (CurationTask)engine.eval(implInst); }
} finally
catch (FileNotFoundException fnfE) {
{ if (reader != null)
log.error("Script: '" + script.getName() + "' not found for task: " + taskName); {
} try
catch (IOException ioE) {
{ reader.close();
log.error("Error loading script: '" + script.getName() + "'"); }
} catch(IOException ioE)
catch (ScriptException scE) {
{ log.error("Error closing task script: " + fileName);
log.error("Error evaluating script: '" + script.getName() + "' msg: " + scE.getMessage()); }
} }
} }
else }
{ else
log.error("No script: '" + script.getName() + "' found for task: " + taskName); {
} log.error("Task script: " + fileName + "not found in: " + scriptDir);
} }
else return false;
{ }
log.error("Script engine: '" + descParts[0] + "' is not installed");
} /**
} * Adds a task descriptor property and flushes catalog to disk.
} *
return task; * @param taskName
} * logical task name
} * @param descriptor
* descriptor for task
*/
public void addDescriptor(String taskName, String descriptor)
{
loadCatalog();
catalog.put(taskName, descriptor);
Writer writer = null;
try
{
writer = new FileWriter(new File(scriptDir, CATALOG));
catalog.store(writer, "do not edit");
}
catch(IOException ioE)
{
log.error("Error saving scripted task catalog: " + CATALOG);
}
finally
{
if (writer != null)
{
try
{
writer.close();
}
catch (IOException ioE)
{
log.error("Error closing scripted task catalog: " + CATALOG);
}
}
}
}
/**
* Returns a task implementation for a given task name,
* or <code>null</code> if no implementation could be obtained.
*
* @param taskName
* logical task name
* @return task
* an object that implements the CurationTask interface
*/
public ResolvedTask resolveTask(String taskName)
{
CurationTask ctask = (CurationTask)PluginManager.getNamedPlugin("curate", CurationTask.class, taskName);
if (ctask != null)
{
return new ResolvedTask(taskName, ctask);
}
// maybe it is implemented by a script?
loadCatalog();
String scriptDesc = catalog.getProperty(taskName);
if (scriptDesc != null)
{
String[] tokens = scriptDesc.split("\\|");
// first descriptor token is name ('alias') of scripting engine
ScriptEngineManager mgr = new ScriptEngineManager();
ScriptEngine engine = mgr.getEngineByName(tokens[0]);
if (engine != null)
{
// see if we can locate the script file and load it
// the second token is the relative path to the file
File script = new File(scriptDir, tokens[1]);
if (script.exists())
{
try
{
Reader reader = new FileReader(script);
engine.eval(reader);
reader.close();
// third token is the constructor expression for the class
// implementing CurationTask interface
ScriptedTask stask = (ScriptedTask)engine.eval(tokens[2]);
return new ResolvedTask(taskName, stask);
}
catch (FileNotFoundException fnfE)
{
log.error("Script: '" + script.getName() + "' not found for task: " + taskName);
}
catch (IOException ioE)
{
log.error("Error loading script: '" + script.getName() + "'");
}
catch (ScriptException scE)
{
log.error("Error evaluating script: '" + script.getName() + "' msg: " + scE.getMessage());
}
}
else
{
log.error("No script: '" + script.getName() + "' found for task: " + taskName);
}
}
else
{
log.error("Script engine: '" + tokens[0] + "' is not installed");
}
}
return null;
}
/**
* Loads catalog of descriptors for tasks if not already loaded
*/
private void loadCatalog()
{
if (catalog == null)
{
catalog = new Properties();
File catalogFile = new File(scriptDir, CATALOG);
if (catalogFile.exists())
{
try
{
Reader reader = new FileReader(catalogFile);
catalog.load(reader);
reader.close();
}
catch(IOException ioE)
{
log.error("Error loading scripted task catalog: " + CATALOG);
}
}
}
}
}