diff --git a/dspace-api/src/main/java/org/dspace/curate/AbstractCurationTask.java b/dspace-api/src/main/java/org/dspace/curate/AbstractCurationTask.java index 5dc88703e4..d58076ce23 100644 --- a/dspace-api/src/main/java/org/dspace/curate/AbstractCurationTask.java +++ b/dspace-api/src/main/java/org/dspace/curate/AbstractCurationTask.java @@ -9,12 +9,16 @@ package org.dspace.curate; import java.io.IOException; import java.sql.SQLException; +import java.util.Properties; + +import org.apache.log4j.Logger; import org.dspace.content.Collection; import org.dspace.content.Community; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.ItemIterator; +import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.handle.HandleManager; @@ -31,6 +35,10 @@ public abstract class AbstractCurationTask implements CurationTask protected Curator curator = null; // curator-assigned taskId protected String taskId = null; + // optional task configuration properties + private Properties taskProps = null; + // logger + private static Logger log = Logger.getLogger(AbstractCurationTask.class); @Override public void init(Curator curator, String taskId) throws IOException @@ -150,4 +158,125 @@ public abstract class AbstractCurationTask implements CurationTask { curator.setResult(taskId, result); } + + /** + * Returns task configuration property value for passed name, else + * null if no properties defined or no value for passed key. + * + * @param name + * the property name + * @return value + * the property value, or null + * + */ + protected String taskProperty(String name) + { + if (taskProps == null) + { + // load properties + taskProps = new Properties(); + StringBuilder modName = new StringBuilder(); + for (String segment : taskId.split("\\.")) + { + // load property segments if present + modName.append(segment); + Properties modProps = ConfigurationManager.getProperties(modName.toString()); + if (modProps != null) + { + taskProps.putAll(modProps); + } + modName.append("."); + } + // warn if *no* properties found + if (taskProps.size() == 0) + { + log.warn("Warning: No configuration properties found for task: " + taskId); + } + } + return taskProps.getProperty(name); + } + + /** + * Returns task configuration integer property value for passed name, else + * passed default value if no properties defined or no value for passed key. + * + * @param name + * the property name + * @param defaultValue value + * the default value + * @return value + * the property value, or default value + * + */ + protected int taskIntProperty(String name, int defaultValue) + { + int intVal = defaultValue; + String strVal = taskProperty(name); + if (strVal != null) + { + try + { + intVal = Integer.parseInt(strVal.trim()); + } + catch(NumberFormatException nfE) + { + log.warn("Warning: Number format error in module: " + taskId + " property: " + name); + } + } + return intVal; + } + + /** + * Returns task configuration long property value for passed name, else + * passed default value if no properties defined or no value for passed key. + * + * @param name + * the property name + * @param defaultValue value + * the default value + * @return value + * the property value, or default + * + */ + protected long taskLongProperty(String name, long defaultValue) + { + long longVal = defaultValue; + String strVal = taskProperty(name); + if (strVal != null) + { + try + { + longVal = Long.parseLong(strVal.trim()); + } + catch(NumberFormatException nfE) + { + log.warn("Warning: Number format error in module: " + taskId + " property: " + name); + } + } + return longVal; + } + + /** + * Returns task configuration boolean property value for passed name, else + * passed default value if no properties defined or no value for passed key. + * + * @param name + * the property name + * @param defaultValue value + * the default value + * @return value + * the property value, or default + * + */ + protected boolean taskBooleanProperty(String name, boolean defaultValue) + { + String strVal = taskProperty(name); + if (strVal != null) + { + strVal = strVal.trim(); + return strVal.equalsIgnoreCase("true") || + strVal.equalsIgnoreCase("yes"); + } + return defaultValue; + } } diff --git a/dspace-api/src/main/java/org/dspace/curate/CurationCli.java b/dspace-api/src/main/java/org/dspace/curate/CurationCli.java index 4459f2f99b..af9f8e0309 100644 --- a/dspace-api/src/main/java/org/dspace/curate/CurationCli.java +++ b/dspace-api/src/main/java/org/dspace/curate/CurationCli.java @@ -17,7 +17,7 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.PosixParser; -import org.dspace.content.Community; +import org.dspace.content.Site; import org.dspace.core.Context; import org.dspace.core.PluginManager; import org.dspace.eperson.EPerson; @@ -48,6 +48,10 @@ public class CurationCli "email address of curating eperson"); options.addOption("r", "reporter", true, "reporter to manage results - use '-' to report to console. If absent, no reporting"); + options.addOption("l", "limit", true, + "maximum number of objects allowed in context cache. If absent, no limit"); + options.addOption("s", "scope", true, + "transaction scope to impose: use 'object', 'curation', or 'open'. If absent, 'open' applies"); options.addOption("v", "verbose", false, "report activity to stdout"); options.addOption("h", "help", false, "help"); @@ -60,6 +64,8 @@ public class CurationCli String taskQueueName = null; String ePersonName = null; String reporterName = null; + String limit = null; + String scope = null; boolean verbose = false; if (line.hasOption('h')) @@ -104,6 +110,16 @@ public class CurationCli { // report file reporterName = line.getOptionValue('r'); } + + if (line.hasOption('l')) + { // cache limit + limit = line.getOptionValue('l'); + } + + if (line.hasOption('s')) + { // transaction scope + scope = line.getOptionValue('s'); + } if (line.hasOption('v')) { // verbose @@ -122,6 +138,18 @@ public class CurationCli System.out.println("A curation task or queue must be specified (-h for help)"); System.exit(1); } + + if (limit != null && Integer.parseInt(limit) <= 0 ) + { + System.out.println("Cache limit '" + limit + "' must be a positive integer"); + System.exit(1); + } + + if (scope != null && Curator.TxScope.valueOf(scope.toUpperCase()) == null) + { + System.out.println("Bad transaction scope '" + scope + "': only 'object', 'curation' or 'open' recognized"); + System.exit(1); + } Context c = new Context(); if (ePersonName != null) @@ -144,6 +172,15 @@ public class CurationCli { curator.setReporter(reporterName); } + if (limit != null) + { + curator.setCacheLimit(Integer.parseInt(limit)); + } + if (scope != null) + { + Curator.TxScope txScope = Curator.TxScope.valueOf(scope.toUpperCase()); + curator.setTransactionScope(txScope); + } // we are operating in batch mode, if anyone cares. curator.setInvoked(Curator.Invoked.BATCH); // load curation tasks @@ -197,15 +234,8 @@ public class CurationCli } if ("all".equals(idName)) { - // run on all top-level communities - for (Community comm : Community.findAllTop(c)) - { - if (verbose) - { - System.out.println("Curating community: " + comm.getHandle()); - } - curator.curate(comm); - } + // run on whole Site + curator.curate(c, Site.getSiteHandle()); } else { diff --git a/dspace-api/src/main/java/org/dspace/curate/Curator.java b/dspace-api/src/main/java/org/dspace/curate/Curator.java index 5a3f943d65..0345b4e7bc 100644 --- a/dspace-api/src/main/java/org/dspace/curate/Curator.java +++ b/dspace-api/src/main/java/org/dspace/curate/Curator.java @@ -52,16 +52,21 @@ public class Curator // invocation modes - used by Suspendable tasks public static enum Invoked { INTERACTIVE, BATCH, ANY }; + // transaction scopes + public static enum TxScope { OBJECT, CURATION, OPEN }; private static Logger log = Logger.getLogger(Curator.class); - private static final ThreadLocal performer = new ThreadLocal(); + private static final ThreadLocal curationCtx = new ThreadLocal(); private Map trMap = new HashMap(); private List perfList = new ArrayList(); private TaskQueue taskQ = null; private String reporter = null; private Invoked iMode = null; + private TaskResolver resolver = new TaskResolver(); + private int cacheLimit = Integer.MAX_VALUE; + private TxScope txScope = TxScope.OPEN; /** * No-arg constructor @@ -79,13 +84,13 @@ public class Curator */ public Curator addTask(String taskName) { - CurationTask task = TaskResolver.resolveTask(taskName); + ResolvedTask task = resolver.resolveTask(taskName); if (task != null) { try { - task.init(this, taskName); - trMap.put(taskName, new TaskRunner(task, taskName)); + task.init(this); + trMap.put(taskName, new TaskRunner(task)); // performance order currently FIFO - to be revisited perfList.add(taskName); } @@ -111,8 +116,7 @@ public class Curator { return perfList.contains(taskName); } - - + /** * Removes a task from the set to be performed. * @@ -150,6 +154,33 @@ public class Curator this.reporter = reporter; return this; } + + /** + * Sets an upper limit for the number of objects in the context cache + * used in a curation, if context accessible. Note that for many forms of + * invocation, the context is not accessible. If limit is reached, + * context cache will be emptied. The default is no limit. + */ + public Curator setCacheLimit(int limit) + { + cacheLimit = limit; + return this; + } + + /** + * Defines the transactional scope of curator executions. + * The default is 'open' meaning that no commits are + * performed by the framework during curation. A scope of + * 'curation' means that a single commit will occur after the + * entire performance is complete, and a scope of 'object' + * will commit for each object (e.g. item) encountered in + * a given execution. + */ + public Curator setTransactionScope(TxScope scope) + { + txScope = scope; + return this; + } /** * Performs all configured tasks upon object identified by id. If @@ -168,12 +199,8 @@ public class Curator } try { - //Save the currently authenticated user's ID to the current Task thread - //(Allows individual tasks to retrieve current user info via currentPerformer() method) - if(c.getCurrentUser()!=null) - { - performer.set(Integer.valueOf(c.getCurrentUser().getID())); - } + //Save the context on current execution thread + curationCtx.set(c); DSpaceObject dso = HandleManager.resolveToObject(c, id); if (dso != null) @@ -187,6 +214,14 @@ public class Curator trMap.get(taskName).run(c, id); } } + // if curation scoped, commit transaction + if (txScope.equals(TxScope.CURATION)) { + Context ctx = curationCtx.get(); + if (ctx != null) + { + ctx.commit(); + } + } } catch (SQLException sqlE) { @@ -194,7 +229,7 @@ public class Curator } finally { - performer.remove(); + curationCtx.remove(); } } @@ -221,8 +256,7 @@ public class Curator { TaskRunner tr = trMap.get(taskName); // do we need to iterate over the object ? - if (type == Constants.ITEM || - tr.task.getClass().isAnnotationPresent(Distributive.class)) + if (type == Constants.ITEM || tr.task.isDistributive()) { tr.run(dso); } @@ -329,59 +363,21 @@ public class Curator tr.setResult(result); } } - - /** - * Returns the Eperson ID of the Context currently in use in performing a task, - * if known, else null. - *

- * In many circumstances, this value will be null: - * when the curator is not in the perform method, when curation - * is invoked with a DSO (the context is 'hidden'). - *

- * The primary intended use for this method is to ensure individual tasks, - * which may need to create a new Context, can also properly initialize that - * Context with an EPerson ID (to ensure proper access rights exist in that Context). - *

- * Current performer information is also used when executing Site-Wide tasks - * (see Curator.doSite() method). - */ - public static Integer currentPerformer() - { - return performer.get(); - } /** - * Returns a Context object which is "authenticated" as the current - * EPerson performer (see 'currentPerformer()' method). This is primarily a - * utility method to allow tasks access to an authenticated Context when - * necessary. + * Returns the context object used in the current curation performance. + * This is primarily a utility method to allow tasks access to the context when necessary. *

- * If the 'currentPerformer()' is null or not set, then this just returns + * If the context is null or not set, then this just returns * a brand new Context object representing an Anonymous User. * - * @return authenticated Context object (or anonymous Context if currentPerformer() is null) + * @return curation Context object (or anonymous Context if curation is null) */ - public static Context authenticatedContext() - throws SQLException + public static Context curationContext() throws SQLException { - //Create a new context - Context ctx = new Context(); - - Integer epersonID = currentPerformer(); - - //If a Curator 'performer' ID is set - if(epersonID!=null) - { - //parse the performer's User ID & set as the currently authenticated user in Context - EPerson autenticatedUser = EPerson.find(ctx, epersonID.intValue()); - ctx.setCurrentUser(autenticatedUser); - } - else - { - //otherwise, no-op. This is the equivalent of an ANONYMOUS USER Context - } - - return ctx; + // Return curation context or new context if undefined + Context curCtx = curationCtx.get(); + return (curCtx != null) ? curCtx : new Context(); } /** @@ -407,19 +403,15 @@ public class Curator Context ctx = null; try { + ctx = curationContext(); // Site-wide Tasks really should have an EPerson performer associated with them, // otherwise they are run as an "anonymous" user with limited access rights. - if(Curator.currentPerformer()==null) + if(ctx.getCurrentUser()==null) { log.warn("You are running one or more Site-Wide curation tasks in ANONYMOUS USER mode," + " as there is no EPerson 'performer' associated with this task. To associate an EPerson 'performer' " + " you should ensure tasks are called via the Curator.curate(Context, ID) method."); } - else - { - // Create a new Context for this Sitewide task, authenticated as the current task performer. - ctx = Curator.authenticatedContext(); - } //Run task for the Site object itself if (! tr.run(site)) @@ -519,21 +511,43 @@ public class Curator } return true; } + + /** + * Record a 'visit' to a DSpace object and enforce any policies set + * on this curator. + */ + private void visit(DSpaceObject dso) throws IOException + { + Context curCtx = curationCtx.get(); + if (curCtx != null) + { + try + { + if (txScope.equals(TxScope.OBJECT)) + { + curCtx.commit(); + } + if (curCtx.getCacheSize() % cacheLimit == 0) + { + curCtx.clearCache(); + } + } + catch (SQLException sqlE) + { + throw new IOException(sqlE.getMessage()); + } + } + } private class TaskRunner { - CurationTask task = null; - String taskName = null; + ResolvedTask task = null; int statusCode = CURATE_UNSET; String result = null; - Invoked mode = null; - int[] codes = null; - public TaskRunner(CurationTask task, String name) + public TaskRunner(ResolvedTask task) { this.task = task; - taskName = name; - parseAnnotations(task.getClass()); } public boolean run(DSpaceObject dso) throws IOException @@ -547,13 +561,13 @@ public class Curator statusCode = task.perform(dso); String id = (dso.getHandle() != null) ? dso.getHandle() : "workflow item: " + dso.getID(); log.info(logMessage(id)); - + visit(dso); return ! suspend(statusCode); } catch(IOException ioe) { //log error & pass exception upwards - log.error("Error executing curation task '" + taskName + "'", ioe); + log.error("Error executing curation task '" + task.getName() + "'", ioe); throw ioe; } } @@ -568,13 +582,13 @@ public class Curator } statusCode = task.perform(c, id); log.info(logMessage(id)); - + visit(null); return ! suspend(statusCode); } catch(IOException ioe) { //log error & pass exception upwards - log.error("Error executing curation task '" + taskName + "'", ioe); + log.error("Error executing curation task '" + task.getName() + "'", ioe); throw ioe; } } @@ -584,21 +598,12 @@ public class Curator this.result = result; } - private void parseAnnotations(Class tClass) - { - Suspendable suspendAnn = (Suspendable)tClass.getAnnotation(Suspendable.class); - if (suspendAnn != null) - { - mode = suspendAnn.invoked(); - codes = suspendAnn.statusCodes(); - } - } - private boolean suspend(int code) { + Invoked mode = task.getMode(); if (mode != null && (mode.equals(Invoked.ANY) || mode.equals(iMode))) { - for (int i : codes) + for (int i : task.getCodes()) { if (code == i) { @@ -617,7 +622,7 @@ public class Curator private String logMessage(String id) { StringBuilder mb = new StringBuilder(); - mb.append("Curation task: ").append(taskName). + mb.append("Curation task: ").append(task.getName()). append(" performed on: ").append(id). append(" with status: ").append(statusCode); if (result != null) diff --git a/dspace-api/src/main/java/org/dspace/curate/ResolvedTask.java b/dspace-api/src/main/java/org/dspace/curate/ResolvedTask.java new file mode 100644 index 0000000000..d6f7fc058b --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/curate/ResolvedTask.java @@ -0,0 +1,147 @@ +/* + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://dspace.org/license/ + */ + +package org.dspace.curate; + +import java.io.IOException; + +import org.dspace.content.DSpaceObject; +import org.dspace.core.Context; + +/** + * ResolvedTask wraps an implementation of one of the CurationTask or + * ScriptedTask interfaces and provides for uniform invocation based on + * CurationTask methods. + * + * @author richardrodgers + */ +public class ResolvedTask +{ + // wrapped objects + private CurationTask cTask; + private ScriptedTask sTask; + // local name of task + private String taskName; + // annotation data + private boolean distributive = false; + private boolean mutative = false; + private Curator.Invoked mode = null; + private int[] codes = null; + + + protected ResolvedTask(String taskName, CurationTask cTask) + { + this.taskName = taskName; + this.cTask = cTask; + // process annotations + Class ctClass = cTask.getClass(); + distributive = ctClass.isAnnotationPresent(Distributive.class); + mutative = ctClass.isAnnotationPresent(Mutative.class); + Suspendable suspendAnno = (Suspendable)ctClass.getAnnotation(Suspendable.class); + if (suspendAnno != null) + { + mode = suspendAnno.invoked(); + codes = suspendAnno.statusCodes(); + } + } + + protected ResolvedTask(String taskName, ScriptedTask sTask) + { + this.taskName = taskName; + this.sTask = sTask; + // annotation processing TBD + } + + /** + * Initialize task - parameters inform the task of it's invoking curator. + * Since the curator can provide services to the task, this represents + * curation DI. + * + * @param curator the Curator controlling this task + * @throws IOException + */ + public void init(Curator curator) throws IOException + { + if (unscripted()) + { + cTask.init(curator, taskName); + } + else + { + sTask.init(curator, taskName); + } + } + + /** + * Perform the curation task upon passed DSO + * + * @param dso the DSpace object + * @return status code + * @throws IOException + */ + public int perform(DSpaceObject dso) throws IOException + { + return (unscripted()) ? cTask.perform(dso) : sTask.performDso(dso); + } + + /** + * Perform the curation task for passed id + * + * @param ctx DSpace context object + * @param id persistent ID for DSpace object + * @return status code + * @throws Exception + */ + public int perform(Context ctx, String id) throws IOException + { + return (unscripted()) ? cTask.perform(ctx, id) : sTask.performId(ctx, id); + } + + /** + * Returns local name of task + * @return name + * the local name of the task + */ + public String getName() + { + return taskName; + } + + /** + * Returns whether task should be distributed through containers + * + */ + public boolean isDistributive() + { + return distributive; + } + + /** + * Returns whether task alters (mutates) it's target objects + * + */ + public boolean isMutative() + { + return mutative; + } + + public Curator.Invoked getMode() + { + return mode; + } + + public int[] getCodes() + { + return codes; + } + + private boolean unscripted() + { + return sTask == null; + } +} diff --git a/dspace-api/src/main/java/org/dspace/curate/ScriptedTask.java b/dspace-api/src/main/java/org/dspace/curate/ScriptedTask.java new file mode 100644 index 0000000000..4f5947861a --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/curate/ScriptedTask.java @@ -0,0 +1,56 @@ +/* + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://dspace.org/license/ + */ + +package org.dspace.curate; + +import java.io.IOException; + +import org.dspace.content.DSpaceObject; +import org.dspace.core.Context; + +/** + * ScriptedTask describes a rather generic ability to perform an operation + * upon a DSpace object. It's semantics are identical to the CurationTask interface, + * but is designed to be implemented in scripting languages, rather than + * Java. For this reason, the 'perform' methods are renamed to accomodate + * languages (like Ruby) that lack method overloading. + * + * @author richardrodgers + */ +public interface ScriptedTask +{ + /** + * Initialize task - parameters inform the task of it's invoking curator. + * Since the curator can provide services to the task, this represents + * curation DI. + * + * @param curator the Curator controlling this task + * @param taskId identifier task should use in invoking services + * @throws IOException + */ + public void init(Curator curator, String taskId) throws IOException; + + /** + * Perform the curation task upon passed DSO + * + * @param dso the DSpace object + * @return status code + * @throws IOException + */ + public int performDso(DSpaceObject dso) throws IOException; + + /** + * Perform the curation task for passed id + * + * @param ctx DSpace context object + * @param id persistent ID for DSpace object + * @return status code + * @throws IOException + */ + public int performId(Context ctx, String id) throws IOException; +} diff --git a/dspace-api/src/main/java/org/dspace/curate/TaskResolver.java b/dspace-api/src/main/java/org/dspace/curate/TaskResolver.java index 5a1abcb6be..52d3a0dd5c 100644 --- a/dspace-api/src/main/java/org/dspace/curate/TaskResolver.java +++ b/dspace-api/src/main/java/org/dspace/curate/TaskResolver.java @@ -1,154 +1,279 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.curate; - -import java.io.File; -import java.io.FileReader; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; - -import javax.script.ScriptEngine; -import javax.script.ScriptEngineManager; -import javax.script.ScriptException; - -import org.apache.log4j.Logger; - -import org.dspace.core.ConfigurationManager; -import org.dspace.core.PluginManager; - -/** - * TaskResolver takes a logical name of a curation task and delivers a - * suitable implementation object. Supported implementation types include: - * (1) Classpath-local Java classes configured and loaded via PluginManager. - * (2) Local script-based tasks, viz. coded in any scripting language whose - * runtimes are accessible via the JSR-223 scripting API. This really amounts - * to the family of dynamic JVM languages: JRuby, Jython, Groovy, Javascript, etc - * Note that the requisite jars and other resources for these languages must be - * installed in the DSpace instance for them to be used here. - * Further work may involve remote URL-loadable code, etc. - * - * Scripted tasks are configured in dspace/config/modules/curate.cfg with the - * property "script.tasks" with value syntax: - * = taskName, - * = taskName - * where task-desc is a descriptor of the form: - * :: - * An example property value: - * - * ruby:rubytask.rb:LinkChecker = linkchecker - * - * This descriptor means that the 'ruby' script engine will be created, - * a script file named 'rubytask.rb' in the directory /ruby/rubtask.rb will be loaded - * and the resolver will expect that a class 'LinkChecker' will be defined in that script file. - * - * @author richardrodgers - */ - -public class TaskResolver -{ - // logging service - private static Logger log = Logger.getLogger(TaskResolver.class); - - // base directory of task scripts - private static String scriptDir = ConfigurationManager.getProperty("curate", "script.dir"); - - // map of task script descriptions, keyed by logical task name - private static Map scriptMap = new HashMap(); - - static - { - // build map of task descriptors - loadDescriptors(); - } - - private TaskResolver() - { - } - - /** - * Loads the map of script descriptors - */ - public static void loadDescriptors() - { - scriptMap.clear(); - String propVal = ConfigurationManager.getProperty("curate", "script.tasks"); - if (propVal != null) - { - for (String desc : propVal.split(",")) - { - String[] parts = desc.split("="); - scriptMap.put(parts[1].trim(), parts[0].trim()); - } - } - } - - /** - * Returns a task implementation for a given task name, - * or null if no implementation could be obtained. - */ - public static CurationTask resolveTask(String taskName) - { - CurationTask task = (CurationTask)PluginManager.getNamedPlugin("curate", CurationTask.class, taskName); - if (task == null) - { - // maybe it is implemented by a script? - String scriptDesc = scriptMap.get(taskName); - if (scriptDesc != null) - { - String[] descParts = scriptDesc.split(":"); - // first descriptor token is name ('alias') of scripting engine, - // which is also the subdirectory where script file kept - ScriptEngineManager mgr = new ScriptEngineManager(); - ScriptEngine engine = mgr.getEngineByName(descParts[0]); - if (engine != null) - { - // see if we can locate the script file and load it - // the second token is the relative path to the file - File script = new File(scriptDir, descParts[1]); - if (script.exists()) - { - try - { - Reader reader = new FileReader(script); - engine.eval(reader); - reader.close(); - // third token is name of class implementing - // CurationTask interface - add ".new" to ask for an instance - String implInst = descParts[2] + ".new"; - task = (CurationTask)engine.eval(implInst); - } - catch (FileNotFoundException fnfE) - { - log.error("Script: '" + script.getName() + "' not found for task: " + taskName); - } - catch (IOException ioE) - { - log.error("Error loading script: '" + script.getName() + "'"); - } - catch (ScriptException scE) - { - log.error("Error evaluating script: '" + script.getName() + "' msg: " + scE.getMessage()); - } - } - else - { - log.error("No script: '" + script.getName() + "' found for task: " + taskName); - } - } - else - { - log.error("Script engine: '" + descParts[0] + "' is not installed"); - } - } - } - return task; - } -} +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.curate; + +import java.io.File; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.Reader; +import java.io.Writer; +import java.util.Properties; + +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; +import javax.script.ScriptException; + +import org.apache.log4j.Logger; + +import org.dspace.core.ConfigurationManager; +import org.dspace.core.PluginManager; + +/** + * TaskResolver takes a logical name of a curation task and attempts to deliver + * a suitable implementation object. Supported implementation types include: + * (1) Classpath-local Java classes configured and loaded via PluginManager. + * (2) Local script-based tasks, viz. coded in any scripting language whose + * runtimes are accessible via the JSR-223 scripting API. This really amounts + * to the family of dynamic JVM languages: JRuby, Jython, Groovy, Javascript, etc + * Note that the requisite jars and other resources for these languages must be + * installed in the DSpace instance for them to be used here. + * Further work may involve remote URL-loadable code, etc. + * + * Scripted tasks are managed in a directory configured with the + * dspace/config/modules/curate.cfg property "script.dir". A catalog of + * scripted tasks named 'task.catalog" is kept in this directory. + * Each task has a 'descriptor' property with value syntax: + * || + * An example property: + * + * linkchecker = ruby|rubytask.rb|LinkChecker.new + * + * This descriptor means that a 'ruby' script engine will be created, + * a script file named 'rubytask.rb' in the directory will be + * loaded and the resolver will expect an evaluation of 'LinkChecker.new' will + * provide a correct implementation object. + * + * Script files may embed their descriptors to facilitate deployment. + * To accomplish this, a script must include the descriptor string with syntax: + * $td= somewhere on a comment line. for example: + * + * # My descriptor $td=ruby|rubytask.rb|LinkChecker.new + * + * For portability, the component may be omitted in this context. + * Thus, $td=ruby||LinkChecker.new will be expanded to a descriptor + * with the name of the embedding file. + * + * @author richardrodgers + */ + +public class TaskResolver +{ + // logging service + private static Logger log = Logger.getLogger(TaskResolver.class); + + // base directory of task scripts & catalog name + private static final String CATALOG = "task.catalog"; + private static final String scriptDir = ConfigurationManager.getProperty("curate", "script.dir"); + + // catalog of script tasks + private Properties catalog; + + public TaskResolver() + { + } + + /** + * Installs a task script. Succeeds only if script: + * (1) exists in the configured script directory and + * (2) contains a recognizable descriptor in a comment line. + * If script lacks a descriptor, it may still be installed + * by manually invoking addDescriptor. + * + * @param taskName + * logical name of task to associate with script + * @param fileName + * name of file containing task script + * @return true if script installed, false if installation failed + */ + public boolean installScript(String taskName, String fileName) + { + // Can we locate the file in the script directory? + File script = new File(scriptDir, fileName); + if (script.exists()) + { + BufferedReader reader = null; + try + { + reader = new BufferedReader(new FileReader(script)); + String line = null; + while((line = reader.readLine()) != null) + { + if (line.startsWith("#") && line.indexOf("$td=") > 0) + { + String desc = line.substring(line.indexOf("$td=") + 4); + // insert relFilePath if missing + String[] tokens = desc.split("\\|"); + if (tokens[1].length() == 0) + { + desc = tokens[0] + "|" + fileName + "|" + tokens[2]; + } + addDescriptor(taskName, desc); + return true; + } + } + } + catch(IOException ioE) + { + log.error("Error reading task script: " + fileName); + } + finally + { + if (reader != null) + { + try + { + reader.close(); + } + catch(IOException ioE) + { + log.error("Error closing task script: " + fileName); + } + } + } + } + else + { + log.error("Task script: " + fileName + "not found in: " + scriptDir); + } + return false; + } + + /** + * Adds a task descriptor property and flushes catalog to disk. + * + * @param taskName + * logical task name + * @param descriptor + * descriptor for task + */ + public void addDescriptor(String taskName, String descriptor) + { + loadCatalog(); + catalog.put(taskName, descriptor); + Writer writer = null; + try + { + writer = new FileWriter(new File(scriptDir, CATALOG)); + catalog.store(writer, "do not edit"); + } + catch(IOException ioE) + { + log.error("Error saving scripted task catalog: " + CATALOG); + } + finally + { + if (writer != null) + { + try + { + writer.close(); + } + catch (IOException ioE) + { + log.error("Error closing scripted task catalog: " + CATALOG); + } + } + } + } + + /** + * Returns a task implementation for a given task name, + * or null if no implementation could be obtained. + * + * @param taskName + * logical task name + * @return task + * an object that implements the CurationTask interface + */ + public ResolvedTask resolveTask(String taskName) + { + CurationTask ctask = (CurationTask)PluginManager.getNamedPlugin("curate", CurationTask.class, taskName); + if (ctask != null) + { + return new ResolvedTask(taskName, ctask); + } + // maybe it is implemented by a script? + loadCatalog(); + String scriptDesc = catalog.getProperty(taskName); + if (scriptDesc != null) + { + String[] tokens = scriptDesc.split("\\|"); + // first descriptor token is name ('alias') of scripting engine + ScriptEngineManager mgr = new ScriptEngineManager(); + ScriptEngine engine = mgr.getEngineByName(tokens[0]); + if (engine != null) + { + // see if we can locate the script file and load it + // the second token is the relative path to the file + File script = new File(scriptDir, tokens[1]); + if (script.exists()) + { + try + { + Reader reader = new FileReader(script); + engine.eval(reader); + reader.close(); + // third token is the constructor expression for the class + // implementing CurationTask interface + ScriptedTask stask = (ScriptedTask)engine.eval(tokens[2]); + return new ResolvedTask(taskName, stask); + } + catch (FileNotFoundException fnfE) + { + log.error("Script: '" + script.getName() + "' not found for task: " + taskName); + } + catch (IOException ioE) + { + log.error("Error loading script: '" + script.getName() + "'"); + } + catch (ScriptException scE) + { + log.error("Error evaluating script: '" + script.getName() + "' msg: " + scE.getMessage()); + } + } + else + { + log.error("No script: '" + script.getName() + "' found for task: " + taskName); + } + } + else + { + log.error("Script engine: '" + tokens[0] + "' is not installed"); + } + } + return null; + } + + /** + * Loads catalog of descriptors for tasks if not already loaded + */ + private void loadCatalog() + { + if (catalog == null) + { + catalog = new Properties(); + File catalogFile = new File(scriptDir, CATALOG); + if (catalogFile.exists()) + { + try + { + Reader reader = new FileReader(catalogFile); + catalog.load(reader); + reader.close(); + } + catch(IOException ioE) + { + log.error("Error loading scripted task catalog: " + CATALOG); + } + } + } + } +}