mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-13 13:03:11 +00:00
Enhancements - new cmdline scope restrictions for operation:
By community/collection/item - patch#1340327 (Scott Yeadon) By maximum number processed - patch#1228949 (Claudia Jurgen) Performance tweaks (Richard Rodgers) git-svn-id: http://scm.dspace.org/svn/repo/trunk@1352 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -40,35 +40,32 @@
|
|||||||
|
|
||||||
package org.dspace.app.mediafilter;
|
package org.dspace.app.mediafilter;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.StringTokenizer;
|
|
||||||
|
|
||||||
import org.apache.commons.cli.CommandLine;
|
import org.apache.commons.cli.CommandLine;
|
||||||
import org.apache.commons.cli.CommandLineParser;
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
import org.apache.commons.cli.HelpFormatter;
|
import org.apache.commons.cli.HelpFormatter;
|
||||||
import org.apache.commons.cli.Options;
|
import org.apache.commons.cli.Options;
|
||||||
import org.apache.commons.cli.PosixParser;
|
import org.apache.commons.cli.PosixParser;
|
||||||
import org.dspace.content.Bitstream;
|
import org.dspace.content.Bitstream;
|
||||||
import org.dspace.content.BitstreamFormat;
|
|
||||||
import org.dspace.content.Bundle;
|
import org.dspace.content.Bundle;
|
||||||
|
import org.dspace.content.Collection;
|
||||||
|
import org.dspace.content.Community;
|
||||||
|
import org.dspace.content.DSpaceObject;
|
||||||
import org.dspace.content.Item;
|
import org.dspace.content.Item;
|
||||||
import org.dspace.content.ItemIterator;
|
import org.dspace.content.ItemIterator;
|
||||||
import org.dspace.core.ConfigurationManager;
|
import org.dspace.core.Constants;
|
||||||
import org.dspace.core.Context;
|
import org.dspace.core.Context;
|
||||||
import org.dspace.core.PluginManager;
|
import org.dspace.core.PluginManager;
|
||||||
|
import org.dspace.handle.HandleManager;
|
||||||
import org.dspace.search.DSIndexer;
|
import org.dspace.search.DSIndexer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MediaFilterManager is the class that invokes the media filters over the
|
* MediaFilterManager is the class that invokes the media filters over the
|
||||||
* repository's content. a few command line flags affect the operation of the
|
* repository's content. a few command line flags affect the operation of the
|
||||||
* MFM: -v verbose outputs all extracted text to SDTDOUT -f force forces all
|
* MFM: -v verbose outputs all extracted text to STDOUT; -f force forces all
|
||||||
* bitstreams to be processed, even if they have been before -n noindex does not
|
* bitstreams to be processed, even if they have been before; -n noindex does not
|
||||||
* recreate index after processing bitstreams
|
* recreate index after processing bitstreams; -i [identifier] limits processing
|
||||||
*
|
* scope to a community, collection or item; and -m [max] limits processing to a
|
||||||
|
* maximum number of items.
|
||||||
*/
|
*/
|
||||||
public class MediaFilterManager
|
public class MediaFilterManager
|
||||||
{
|
{
|
||||||
@@ -78,6 +75,12 @@ public class MediaFilterManager
|
|||||||
|
|
||||||
public static boolean isForce = false; // default to not forced
|
public static boolean isForce = false; // default to not forced
|
||||||
|
|
||||||
|
public static String identifier = null; // object scope limiter
|
||||||
|
|
||||||
|
public static int max2Process = Integer.MAX_VALUE; // maximum number to process
|
||||||
|
|
||||||
|
public static int processed = 0; // number processed
|
||||||
|
|
||||||
public static void main(String[] argv) throws Exception
|
public static void main(String[] argv) throws Exception
|
||||||
{
|
{
|
||||||
// set headless for non-gui workstations
|
// set headless for non-gui workstations
|
||||||
@@ -94,6 +97,10 @@ public class MediaFilterManager
|
|||||||
"force all bitstreams to be processed");
|
"force all bitstreams to be processed");
|
||||||
options.addOption("n", "noindex", false,
|
options.addOption("n", "noindex", false,
|
||||||
"do NOT re-create search index after filtering bitstreams");
|
"do NOT re-create search index after filtering bitstreams");
|
||||||
|
options.addOption("i", "identifier", true,
|
||||||
|
"ONLY process bitstreams belonging to identifier");
|
||||||
|
options.addOption("m", "maximum", true,
|
||||||
|
"process no more than maximum items");
|
||||||
options.addOption("h", "help", false, "help");
|
options.addOption("h", "help", false, "help");
|
||||||
|
|
||||||
CommandLine line = parser.parse(options, argv);
|
CommandLine line = parser.parse(options, argv);
|
||||||
@@ -121,6 +128,22 @@ public class MediaFilterManager
|
|||||||
isForce = true;
|
isForce = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (line.hasOption('i'))
|
||||||
|
{
|
||||||
|
identifier = line.getOptionValue('i');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line.hasOption('m'))
|
||||||
|
{
|
||||||
|
max2Process = Integer.parseInt(line.getOptionValue('m'));
|
||||||
|
if (max2Process <= 1)
|
||||||
|
{
|
||||||
|
System.out.println("Invalid maximum value '" +
|
||||||
|
line.getOptionValue('m') + "' - ignoring");
|
||||||
|
max2Process = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Context c = null;
|
Context c = null;
|
||||||
|
|
||||||
try
|
try
|
||||||
@@ -131,7 +154,32 @@ public class MediaFilterManager
|
|||||||
c.setIgnoreAuthorization(true);
|
c.setIgnoreAuthorization(true);
|
||||||
|
|
||||||
// now apply the filters
|
// now apply the filters
|
||||||
|
if (identifier == null)
|
||||||
|
{
|
||||||
applyFiltersAllItems(c);
|
applyFiltersAllItems(c);
|
||||||
|
}
|
||||||
|
else // restrict application scope to identifier
|
||||||
|
{
|
||||||
|
DSpaceObject dso = HandleManager.resolveToObject(c, identifier);
|
||||||
|
if (dso == null)
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Cannot resolve "
|
||||||
|
+ identifier + " to a DSpace object");
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (dso.getType())
|
||||||
|
{
|
||||||
|
case Constants.COMMUNITY:
|
||||||
|
applyFiltersCommunity(c, (Community)dso);
|
||||||
|
break;
|
||||||
|
case Constants.COLLECTION:
|
||||||
|
applyFiltersCollection(c, (Collection)dso);
|
||||||
|
break;
|
||||||
|
case Constants.ITEM:
|
||||||
|
applyFiltersItem(c, (Item)dso);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// create search index?
|
// create search index?
|
||||||
if (createIndex)
|
if (createIndex)
|
||||||
@@ -155,41 +203,74 @@ public class MediaFilterManager
|
|||||||
public static void applyFiltersAllItems(Context c) throws Exception
|
public static void applyFiltersAllItems(Context c) throws Exception
|
||||||
{
|
{
|
||||||
ItemIterator i = Item.findAll(c);
|
ItemIterator i = Item.findAll(c);
|
||||||
|
while (i.hasNext() && processed <= max2Process)
|
||||||
while (i.hasNext())
|
|
||||||
{
|
{
|
||||||
Item myItem = i.next();
|
applyFiltersItem(c, i.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
filterItem(c, myItem);
|
public static void applyFiltersCommunity(Context c, Community community)
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
Community[] subcommunities = community.getSubcommunities();
|
||||||
|
for (int i = 0; i < subcommunities.length; i++)
|
||||||
|
{
|
||||||
|
applyFiltersCommunity(c, subcommunities[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Collection[] collections = community.getCollections();
|
||||||
|
for (int j = 0; j < collections.length; j++)
|
||||||
|
{
|
||||||
|
applyFiltersCollection(c, collections[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void applyFiltersCollection(Context c, Collection collection)
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
ItemIterator i = collection.getItems();
|
||||||
|
while (i.hasNext() && processed <= max2Process)
|
||||||
|
{
|
||||||
|
applyFiltersItem(c, i.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void applyFiltersItem(Context c, Item item) throws Exception
|
||||||
|
{
|
||||||
|
if (filterItem(c, item))
|
||||||
|
{
|
||||||
// commit changes after each filtered item
|
// commit changes after each filtered item
|
||||||
c.commit();
|
c.commit();
|
||||||
|
// increment processed count
|
||||||
|
++processed;
|
||||||
}
|
}
|
||||||
|
// clear item objects from context cache
|
||||||
|
item.decache();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* iterate through the item's bitstreams in the ORIGINAL bundle, applying
|
* iterate through the item's bitstreams in the ORIGINAL bundle, applying
|
||||||
* filters if possible
|
* filters if possible
|
||||||
|
*
|
||||||
|
* @return true if any bitstreams processed,
|
||||||
|
* false if none
|
||||||
*/
|
*/
|
||||||
public static void filterItem(Context c, Item myItem) throws Exception
|
public static boolean filterItem(Context c, Item myItem) throws Exception
|
||||||
{
|
{
|
||||||
// get 'original' bundles
|
// get 'original' bundles
|
||||||
Bundle[] myBundles = myItem.getBundles();
|
Bundle[] myBundles = myItem.getBundles("ORIGINAL");
|
||||||
|
boolean done = false;
|
||||||
for (int i = 0; i < myBundles.length; i++)
|
for (int i = 0; i < myBundles.length; i++)
|
||||||
{
|
|
||||||
// could have multiple 'ORIGINAL' bundles (hmm, probably not)
|
|
||||||
if ("ORIGINAL".equals(myBundles[i].getName()))
|
|
||||||
{
|
{
|
||||||
// now look at all of the bitstreams
|
// now look at all of the bitstreams
|
||||||
Bitstream[] myBitstreams = myBundles[i].getBitstreams();
|
Bitstream[] myBitstreams = myBundles[i].getBitstreams();
|
||||||
|
|
||||||
for (int k = 0; k < myBitstreams.length; k++)
|
for (int k = 0; k < myBitstreams.length; k++)
|
||||||
{
|
{
|
||||||
filterBitstream(c, myItem, myBitstreams[k]);
|
done |= filterBitstream(c, myItem, myBitstreams[k]);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -198,8 +279,11 @@ public class MediaFilterManager
|
|||||||
* An exception will be thrown if the media filter class cannot be
|
* An exception will be thrown if the media filter class cannot be
|
||||||
* instantiated, exceptions from filtering will be logged to STDOUT and
|
* instantiated, exceptions from filtering will be logged to STDOUT and
|
||||||
* swallowed.
|
* swallowed.
|
||||||
|
*
|
||||||
|
* @return true if bitstream processed,
|
||||||
|
* false if no applicable filter or already processed
|
||||||
*/
|
*/
|
||||||
public static void filterBitstream(Context c, Item myItem,
|
public static boolean filterBitstream(Context c, Item myItem,
|
||||||
Bitstream myBitstream) throws Exception
|
Bitstream myBitstream) throws Exception
|
||||||
{
|
{
|
||||||
// do we have a filter for that format?
|
// do we have a filter for that format?
|
||||||
@@ -215,6 +299,7 @@ public class MediaFilterManager
|
|||||||
{
|
{
|
||||||
myItem.update(); // Make sure new bitstream has a sequence
|
myItem.update(); // Make sure new bitstream has a sequence
|
||||||
// number
|
// number
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
@@ -224,5 +309,6 @@ public class MediaFilterManager
|
|||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user