mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-13 21:13:19 +00:00
[DS-739] Batch processing mode extended to itemimport, filter media. Configurable option for batch size added
git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@5726 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -75,6 +75,7 @@ import org.dspace.core.Context;
|
||||
import org.dspace.eperson.EPerson;
|
||||
import org.dspace.eperson.Group;
|
||||
import org.dspace.handle.HandleManager;
|
||||
import org.dspace.search.DSIndexer;
|
||||
import org.dspace.workflow.WorkflowManager;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
@@ -135,6 +136,10 @@ public class ItemImport
|
||||
|
||||
|
||||
public static void main(String[] argv) throws Exception
|
||||
{
|
||||
DSIndexer.setBatchProcessingMode(true);
|
||||
|
||||
try
|
||||
{
|
||||
// create an options object and populate it
|
||||
CommandLineParser parser = new PosixParser();
|
||||
@@ -566,6 +571,11 @@ public class ItemImport
|
||||
}
|
||||
System.exit(status);
|
||||
}
|
||||
finally
|
||||
{
|
||||
DSIndexer.setBatchProcessingMode(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void addItems(Context c, Collection[] mycollections,
|
||||
String sourceDir, String mapFile, boolean template) throws Exception
|
||||
|
@@ -393,8 +393,16 @@ public class MediaFilterManager
|
||||
{
|
||||
System.out.println("Updating search index:");
|
||||
}
|
||||
DSIndexer.setBatchProcessingMode(true);
|
||||
try
|
||||
{
|
||||
DSIndexer.updateIndex(c);
|
||||
}
|
||||
finally
|
||||
{
|
||||
DSIndexer.setBatchProcessingMode(false);
|
||||
}
|
||||
}
|
||||
|
||||
c.complete();
|
||||
c = null;
|
||||
|
@@ -119,6 +119,9 @@ public class DSIndexer
|
||||
|
||||
private static final long WRITE_LOCK_TIMEOUT = 30000 /* 30 sec */;
|
||||
|
||||
private static int batchFlushAfterDocuments = ConfigurationManager.getIntProperty("search.batch.documents", 20);
|
||||
private static boolean batchProcessingMode = false;
|
||||
|
||||
// Class to hold the index configuration (one instance per config line)
|
||||
private static class IndexConfig
|
||||
{
|
||||
@@ -248,6 +251,15 @@ public class DSIndexer
|
||||
}
|
||||
}
|
||||
|
||||
public static void setBatchProcessingMode(boolean mode)
|
||||
{
|
||||
batchProcessingMode = mode;
|
||||
if (mode == false)
|
||||
{
|
||||
flushIndexingTaskQueue();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the handle for the "dso" already exists in the index, and
|
||||
* the "dso" has a lastModified timestamp that is newer than
|
||||
@@ -277,29 +289,28 @@ public class DSIndexer
|
||||
*/
|
||||
public static void indexContent(Context context, DSpaceObject dso, boolean force) throws SQLException
|
||||
{
|
||||
String handle = dso.getHandle();
|
||||
IndexingAction action = null;
|
||||
IndexWriter writer = null;
|
||||
|
||||
try
|
||||
{
|
||||
action = prepareIndexingAction(dso, force);
|
||||
|
||||
if (action != null)
|
||||
IndexingTask task = prepareIndexingTask(dso, force);
|
||||
if (task != null)
|
||||
{
|
||||
if (batchProcessingMode)
|
||||
{
|
||||
addToIndexingTaskQueue(task);
|
||||
}
|
||||
else
|
||||
{
|
||||
IndexWriter writer = null;
|
||||
try
|
||||
{
|
||||
writer = openIndex(false);
|
||||
processIndexingAction(writer, action);
|
||||
}
|
||||
|
||||
} catch (Exception e)
|
||||
{
|
||||
log.error(e.getMessage(), e);
|
||||
processIndexingTask(writer, task);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (action != null && action.getDocument() != null)
|
||||
if (task.getDocument() != null)
|
||||
{
|
||||
closeAllReaders(action.getDocument());
|
||||
closeAllReaders(task.getDocument());
|
||||
}
|
||||
|
||||
if (writer != null)
|
||||
@@ -315,6 +326,13 @@ public class DSIndexer
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
log.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* unIndex removes an Item, Collection, or Community only works if the
|
||||
@@ -346,22 +364,31 @@ public class DSIndexer
|
||||
* @throws SQLException
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void unIndexContent(Context context, String handle)
|
||||
throws SQLException, IOException
|
||||
public static void unIndexContent(Context context, String handle) throws SQLException, IOException
|
||||
{
|
||||
if (handle != null)
|
||||
{
|
||||
IndexingTask task = new IndexingTask(IndexingTask.Action.DELETE, new Term("handle", handle), null);
|
||||
if (task != null)
|
||||
{
|
||||
if (batchProcessingMode)
|
||||
{
|
||||
addToIndexingTaskQueue(task);
|
||||
}
|
||||
else
|
||||
{
|
||||
IndexWriter writer = openIndex(false);
|
||||
try
|
||||
{
|
||||
// we have a handle (our unique ID, so remove)
|
||||
processIndexingAction(writer, new IndexingAction(IndexingAction.Action.DELETE, new Term("handle", handle), null));
|
||||
processIndexingTask(writer, task);
|
||||
}
|
||||
finally
|
||||
{
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
log.warn("unindex of content with null handle attempted");
|
||||
@@ -372,8 +399,6 @@ public class DSIndexer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* reIndexContent removes something from the index, then re-indexes it
|
||||
*
|
||||
@@ -407,7 +432,6 @@ public class DSIndexer
|
||||
|
||||
/* Reindex all content preemptively. */
|
||||
DSIndexer.updateIndex(c, true);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -424,6 +448,7 @@ public class DSIndexer
|
||||
|
||||
try
|
||||
{
|
||||
flushIndexingTaskQueue(writer);
|
||||
writer.optimize();
|
||||
}
|
||||
finally
|
||||
@@ -443,7 +468,10 @@ public class DSIndexer
|
||||
*/
|
||||
public static void main(String[] args) throws SQLException, IOException
|
||||
{
|
||||
|
||||
Date startTime = new Date();
|
||||
try
|
||||
{
|
||||
setBatchProcessingMode(true);
|
||||
Context context = new Context();
|
||||
context.setIgnoreAuthorization(true);
|
||||
|
||||
@@ -528,6 +556,15 @@ public class DSIndexer
|
||||
|
||||
log.info("Done with indexing");
|
||||
}
|
||||
finally
|
||||
{
|
||||
setBatchProcessingMode(false);
|
||||
Date endTime = new Date();
|
||||
System.out.println("Started: " + startTime.getTime());
|
||||
System.out.println("Ended: " + endTime.getTime());
|
||||
System.out.println("Elapsed time: " + ((endTime.getTime() - startTime.getTime()) / 1000) + " secs (" + (endTime.getTime() - startTime.getTime()) + " msecs)");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterates over all Items, Collections and Communities. And updates
|
||||
@@ -562,7 +599,7 @@ public class DSIndexer
|
||||
for(items = Item.findAll(context);items.hasNext();)
|
||||
{
|
||||
Item item = (Item) items.next();
|
||||
addToIndexingActionQueue(prepareIndexingAction(item, force));
|
||||
indexContent(context, item);
|
||||
item.decache();
|
||||
}
|
||||
}
|
||||
@@ -574,24 +611,19 @@ public class DSIndexer
|
||||
}
|
||||
}
|
||||
|
||||
Collection[] collections = Collection.findAll(context);
|
||||
for (int i = 0; i < collections.length; i++)
|
||||
for (Collection collection : Collection.findAll(context))
|
||||
{
|
||||
addToIndexingActionQueue(prepareIndexingAction(collections[i], force));
|
||||
context.removeCached(collections[i], collections[i].getID());
|
||||
|
||||
indexContent(context, collection);
|
||||
context.removeCached(collection, collection.getID());
|
||||
}
|
||||
|
||||
Community[] communities = Community.findAll(context);
|
||||
for (int i = 0; i < communities.length; i++)
|
||||
for (Community community : Community.findAll(context))
|
||||
{
|
||||
addToIndexingActionQueue(prepareIndexingAction(communities[i], force));
|
||||
context.removeCached(communities[i], communities[i].getID());
|
||||
indexContent(context, community);
|
||||
context.removeCached(community, community.getID());
|
||||
}
|
||||
|
||||
flushIndexingActionQueue();
|
||||
optimizeIndex(context);
|
||||
|
||||
}
|
||||
catch(Exception e)
|
||||
{
|
||||
@@ -679,11 +711,11 @@ public class DSIndexer
|
||||
}
|
||||
|
||||
|
||||
static IndexingAction prepareIndexingAction(DSpaceObject dso, boolean force) throws SQLException, IOException
|
||||
static IndexingTask prepareIndexingTask(DSpaceObject dso, boolean force) throws SQLException, IOException
|
||||
{
|
||||
String handle = dso.getHandle();
|
||||
Term term = new Term("handle", handle);
|
||||
IndexingAction action = null;
|
||||
IndexingTask action = null;
|
||||
switch (dso.getType())
|
||||
{
|
||||
case Constants.ITEM :
|
||||
@@ -694,23 +726,23 @@ public class DSIndexer
|
||||
if (requiresIndexing(term, ((Item)dso).getLastModified()) || force)
|
||||
{
|
||||
log.info("Writing Item: " + handle + " to Index");
|
||||
action = new IndexingAction(IndexingAction.Action.UPDATE, term, buildDocumentForItem((Item)dso));
|
||||
action = new IndexingTask(IndexingTask.Action.UPDATE, term, buildDocumentForItem((Item)dso));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
action = new IndexingAction(IndexingAction.Action.DELETE, term, null);
|
||||
action = new IndexingTask(IndexingTask.Action.DELETE, term, null);
|
||||
}
|
||||
break;
|
||||
|
||||
case Constants.COLLECTION :
|
||||
log.info("Writing Collection: " + handle + " to Index");
|
||||
action = new IndexingAction(IndexingAction.Action.UPDATE, term, buildDocumentForCollection((Collection)dso));
|
||||
action = new IndexingTask(IndexingTask.Action.UPDATE, term, buildDocumentForCollection((Collection)dso));
|
||||
break;
|
||||
|
||||
case Constants.COMMUNITY :
|
||||
log.info("Writing Community: " + handle + " to Index");
|
||||
action = new IndexingAction(IndexingAction.Action.UPDATE, term, buildDocumentForCommunity((Community)dso));
|
||||
action = new IndexingTask(IndexingTask.Action.UPDATE, term, buildDocumentForCommunity((Community)dso));
|
||||
break;
|
||||
|
||||
default :
|
||||
@@ -719,7 +751,7 @@ public class DSIndexer
|
||||
return action;
|
||||
}
|
||||
|
||||
static void processIndexingAction(IndexWriter writer, IndexingAction action) throws IOException
|
||||
static void processIndexingTask(IndexWriter writer, IndexingTask action) throws IOException
|
||||
{
|
||||
if (action != null)
|
||||
{
|
||||
@@ -734,21 +766,21 @@ public class DSIndexer
|
||||
}
|
||||
}
|
||||
|
||||
private static List<IndexingAction> actionQueue = new ArrayList<IndexingAction>();
|
||||
private static List<IndexingTask> actionQueue = new ArrayList<IndexingTask>();
|
||||
|
||||
static void addToIndexingActionQueue(IndexingAction action)
|
||||
static synchronized void addToIndexingTaskQueue(IndexingTask action)
|
||||
{
|
||||
if (action != null)
|
||||
{
|
||||
actionQueue.add(action);
|
||||
if (actionQueue.size() > 10)
|
||||
if (actionQueue.size() >= batchFlushAfterDocuments)
|
||||
{
|
||||
flushIndexingActionQueue();
|
||||
flushIndexingTaskQueue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static synchronized void flushIndexingActionQueue()
|
||||
static void flushIndexingTaskQueue()
|
||||
{
|
||||
if (actionQueue.size() > 0)
|
||||
{
|
||||
@@ -757,22 +789,7 @@ public class DSIndexer
|
||||
try
|
||||
{
|
||||
writer = openIndex(false);
|
||||
for (IndexingAction action : actionQueue)
|
||||
{
|
||||
try
|
||||
{
|
||||
processIndexingAction(writer, action);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (action.getDocument() != null)
|
||||
{
|
||||
closeAllReaders(action.getDocument());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
actionQueue.clear();
|
||||
flushIndexingTaskQueue(writer);
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
@@ -792,10 +809,33 @@ public class DSIndexer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private static synchronized void flushIndexingTaskQueue(IndexWriter writer)
|
||||
{
|
||||
for (IndexingTask action : actionQueue)
|
||||
{
|
||||
try
|
||||
{
|
||||
processIndexingTask(writer, action);
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
log.error(e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (action.getDocument() != null)
|
||||
{
|
||||
closeAllReaders(action.getDocument());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
actionQueue.clear();
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Private
|
||||
////////////////////////////////////
|
||||
|
@@ -1,45 +0,0 @@
|
||||
package org.dspace.search;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
class IndexingAction
|
||||
{
|
||||
enum Action { ADD, UPDATE, DELETE };
|
||||
|
||||
private Action action;
|
||||
private Term term;
|
||||
private Document doc;
|
||||
|
||||
IndexingAction(Action pAction, Term pTerm, Document pDoc)
|
||||
{
|
||||
action = pAction;
|
||||
term = pTerm;
|
||||
doc = pDoc;
|
||||
}
|
||||
|
||||
boolean isAdd()
|
||||
{
|
||||
return action == Action.ADD;
|
||||
}
|
||||
|
||||
boolean isDelete()
|
||||
{
|
||||
return action == Action.DELETE;
|
||||
}
|
||||
|
||||
boolean isUpdate()
|
||||
{
|
||||
return action == Action.UPDATE;
|
||||
}
|
||||
|
||||
Term getTerm()
|
||||
{
|
||||
return term;
|
||||
}
|
||||
|
||||
Document getDocument()
|
||||
{
|
||||
return doc;
|
||||
}
|
||||
}
|
84
dspace-api/src/main/java/org/dspace/search/IndexingTask.java
Normal file
84
dspace-api/src/main/java/org/dspace/search/IndexingTask.java
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* IndexingTask.java
|
||||
*
|
||||
* Version: $Revision: 5724 $
|
||||
*
|
||||
* Date: $Date: 2010-11-04 17:34:12 +0000 (Thu, 04 Nov 2010) $
|
||||
*
|
||||
* Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
|
||||
* Institute of Technology. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* - Neither the name of the Hewlett-Packard Company nor the name of the
|
||||
* Massachusetts Institute of Technology nor the names of their
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*/
|
||||
package org.dspace.search;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
class IndexingTask
|
||||
{
|
||||
enum Action { ADD, UPDATE, DELETE };
|
||||
|
||||
private Action action;
|
||||
private Term term;
|
||||
private Document doc;
|
||||
|
||||
IndexingTask(Action pAction, Term pTerm, Document pDoc)
|
||||
{
|
||||
action = pAction;
|
||||
term = pTerm;
|
||||
doc = pDoc;
|
||||
}
|
||||
|
||||
boolean isAdd()
|
||||
{
|
||||
return action == Action.ADD;
|
||||
}
|
||||
|
||||
boolean isDelete()
|
||||
{
|
||||
return action == Action.DELETE;
|
||||
}
|
||||
|
||||
boolean isUpdate()
|
||||
{
|
||||
return action == Action.UPDATE;
|
||||
}
|
||||
|
||||
Term getTerm()
|
||||
{
|
||||
return term;
|
||||
}
|
||||
|
||||
Document getDocument()
|
||||
{
|
||||
return doc;
|
||||
}
|
||||
}
|
@@ -79,6 +79,7 @@
|
||||
- [DS-733] Load testing utilities
|
||||
- [DS-734] Improve database efficiency
|
||||
- [DS-736] ItemImport usage and efficiency improvements
|
||||
- [DS-739] Lucene indexing efficiency improvements
|
||||
|
||||
(Graham Triggs, Mark Wood)
|
||||
- [DS-707] General improvements to performance, bug fixes and style
|
||||
|
4
pom.xml
4
pom.xml
@@ -360,12 +360,12 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>2.3.0</version>
|
||||
<version>2.9.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers</artifactId>
|
||||
<version>2.3.0</version>
|
||||
<version>2.9.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.dspace</groupId>
|
||||
|
Reference in New Issue
Block a user