[DS-739] Writes empty documents (with modification time and status) for withdrawn or deleted handles to the index - useful for harvesting

git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@5727 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
Graham Triggs
2010-11-04 23:14:17 +00:00
parent 2d3a8e6b21
commit 1887864b20

View File

@@ -115,8 +115,9 @@ public class DSIndexer
{
private static final Logger log = Logger.getLogger(DSIndexer.class);
private static final String LAST_INDEXED_FIELD = "DSIndexer.lastIndexed";
private static final String LAST_INDEXED_FIELD = "DSIndexer.lastIndexed";
private static final String DOCUMENT_STATUS_FIELD = "DSIndexer.status";
private static final long WRITE_LOCK_TIMEOUT = 30000 /* 30 sec */;
private static int batchFlushAfterDocuments = ConfigurationManager.getIntProperty("search.batch.documents", 20);
@@ -368,7 +369,7 @@ public class DSIndexer
{
if (handle != null)
{
IndexingTask task = new IndexingTask(IndexingTask.Action.DELETE, new Term("handle", handle), null);
IndexingTask task = new IndexingTask(IndexingTask.Action.DELETE, new Term("handle", handle), buildDocumentForDeletedHandle(handle));
if (task != null)
{
if (batchProcessingMode)
@@ -731,7 +732,7 @@ public class DSIndexer
}
else
{
action = new IndexingTask(IndexingTask.Action.DELETE, term, null);
action = new IndexingTask(IndexingTask.Action.DELETE, term, buildDocumentForWithdrawnItem((Item)dso));
}
break;
@@ -757,7 +758,14 @@ public class DSIndexer
{
if (action.isDelete())
{
writer.deleteDocuments(action.getTerm());
if (action.getDocument() != null)
{
writer.updateDocument(action.getTerm(), action.getDocument());
}
else
{
writer.deleteDocuments(action.getTerm());
}
}
else
{
@@ -1246,6 +1254,7 @@ public class DSIndexer
// want to be able to check when last updated
// (not tokenized, but it is indexed)
doc.add(new Field(LAST_INDEXED_FIELD, Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(DOCUMENT_STATUS_FIELD, "archived", Field.Store.YES, Field.Index.UN_TOKENIZED));
// KEPT FOR BACKWARDS COMPATIBILITY
// do location, type, handle first
@@ -1279,6 +1288,34 @@ public class DSIndexer
return doc;
}
private static Document buildDocumentForDeletedHandle(String handle)
{
Document doc = new Document();
// want to be able to check when last updated
// (not tokenized, but it is indexed)
doc.add(new Field(LAST_INDEXED_FIELD, Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(DOCUMENT_STATUS_FIELD, "deleted", Field.Store.YES, Field.Index.UN_TOKENIZED));
// Do not add any other fields, as we don't want to be able to find it - just check the last indexed time
return doc;
}
private static Document buildDocumentForWithdrawnItem(Item item)
{
Document doc = new Document();
// want to be able to check when last updated
// (not tokenized, but it is indexed)
doc.add(new Field(LAST_INDEXED_FIELD, Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(DOCUMENT_STATUS_FIELD, "withdrawn", Field.Store.YES, Field.Index.UN_TOKENIZED));
// Do not add any other fields, as we don't want to be able to find it - just check the last indexed time
return doc;
}
private static void closeAllReaders(Document doc)
{
if (doc != null)