port PR 1867

This commit is contained in:
Terry Brady
2018-08-06 10:17:52 -07:00
parent 3eb16056c6
commit 06ca8809e1
5 changed files with 332 additions and 28 deletions

View File

@@ -256,6 +256,12 @@ public class ItemServiceImpl extends DSpaceObjectServiceImpl<Item> implements It
return itemDAO.findAll(context, true, true, true, since);
}
@Override
public Iterator<Item> findInArchiveOrWithdrawnNonDiscoverableModifiedSince(Context context, Date since)
throws SQLException {
return itemDAO.findAll(context, true, true, false, since);
}
@Override
public void updateLastModified(Context context, Item item) throws SQLException, AuthorizeException {
item.setLastModified(new Date());

View File

@@ -169,6 +169,16 @@ public interface ItemService extends DSpaceObjectService<Item>, DSpaceObjectLega
public Iterator<Item> findInArchiveOrWithdrawnDiscoverableModifiedSince(Context context, Date since)
throws SQLException;
/**
* Get all Items installed or withdrawn, NON-discoverable, and modified since a Date.
* @param context context
* @param since earliest interesting last-modified date, or null for no date test.
* @return an iterator over the items in the collection.
* @throws SQLException if database error
*/
public Iterator<Item> findInArchiveOrWithdrawnNonDiscoverableModifiedSince(Context context, Date since)
throws SQLException;
/**
* Get all the items (including private and withdrawn) in this collection. The order is indeterminate.
*

View File

@@ -223,6 +223,124 @@ public class ItemTest extends AbstractDSpaceObjectTest {
assertFalse("testFindBySubmitter 3", all.hasNext());
}
/**
* Test of findInArchiveOrWithdrawnDiscoverableModifiedSince method, of class Item.
*/
@Test
public void testFindInArchiveOrWithdrawnDiscoverableModifiedSince() throws Exception {
// Init item to be both withdrawn and discoverable
it.setWithdrawn(true);
it.setArchived(false);
it.setDiscoverable(true);
// Test 0: Using a future 'modified since' date, we should get non-null list, with no items
Iterator<Item> all = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(context,
DateUtils.addDays(it.getLastModified(),1));
assertThat("Returned list should not be null", all, notNullValue());
boolean added = false;
while (all.hasNext()) {
Item tmp = all.next();
if (tmp.equals(it)) {
added = true;
}
}
// Test 1: we should NOT find our item in this list
assertFalse("List should not contain item when passing a date newer than item last-modified date", added);
// Test 2: Using a past 'modified since' date, we should get a non-null list containing our item
all = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(context,
DateUtils.addDays(it.getLastModified(),-1));
assertThat("Returned list should not be null", all, notNullValue());
added = false;
while (all.hasNext()) {
Item tmp = all.next();
if (tmp.equals(it)) {
added = true;
}
}
// Test 3: we should find our item in this list
assertTrue("List should contain item when passing a date older than item last-modified date", added);
// Repeat Tests 2, 3 with withdrawn = false and archived = true as this should result in same behaviour
it.setWithdrawn(false);
it.setArchived(true);
// Test 4: Using a past 'modified since' date, we should get a non-null list containing our item
all = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(context,
DateUtils.addDays(it.getLastModified(),-1));
assertThat("Returned list should not be null", all, notNullValue());
added = false;
while (all.hasNext()) {
Item tmp = all.next();
if (tmp.equals(it)) {
added = true;
}
}
// Test 5: We should find our item in this list
assertTrue("List should contain item when passing a date older than item last-modified date", added);
// Test 6: Make sure non-discoverable items are not returned, regardless of archived/withdrawn state
it.setDiscoverable(false);
all = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(context,
DateUtils.addDays(it.getLastModified(),-1));
assertThat("Returned list should not be null", all, notNullValue());
added = false;
while (all.hasNext()) {
Item tmp = all.next();
if (tmp.equals(it)) {
added = true;
}
}
// Test 7: We should not find our item in this list
assertFalse("List should not contain non-discoverable items", added);
}
/**
* Test of findInArchiveOrWithdrawnNonDiscoverableModifiedSince method, of class Item.
*/
@Test
public void testFindInArchiveOrWithdrawnNonDiscoverableModifiedSince() throws Exception {
// Init item to be both withdrawn and discoverable
it.setWithdrawn(true);
it.setArchived(false);
it.setDiscoverable(false);
// Test 0: Using a future 'modified since' date, we should get non-null list, with no items
Iterator<Item> all = itemService.findInArchiveOrWithdrawnNonDiscoverableModifiedSince(context,
DateUtils.addDays(it.getLastModified(),1));
assertThat("Returned list should not be null", all, notNullValue());
boolean added = false;
while (all.hasNext()) {
Item tmp = all.next();
if (tmp.equals(it)) {
added = true;
}
}
// Test 1: We should NOT find our item in this list
assertFalse("List should not contain item when passing a date newer than item last-modified date", added);
// Test 2: Using a past 'modified since' date, we should get a non-null list containing our item
all = itemService.findInArchiveOrWithdrawnNonDiscoverableModifiedSince(context,
DateUtils.addDays(it.getLastModified(),-1));
assertThat("Returned list should not be null", all, notNullValue());
added = false;
while (all.hasNext()) {
Item tmp = all.next();
if (tmp.equals(it)) {
added = true;
}
}
// Test 3: We should find our item in this list
assertTrue("List should contain item when passing a date older than item last-modified date", added);
// Repeat Tests 2, 3 with discoverable = true
it.setDiscoverable(true);
// Test 4: Now we should still get a non-null list with NO items since item is discoverable
all = itemService.findInArchiveOrWithdrawnNonDiscoverableModifiedSince(context,
DateUtils.addDays(it.getLastModified(),-1));
assertThat("Returned list should not be null", all, notNullValue());
added = false;
while (all.hasNext()) {
Item tmp = all.next();
if (tmp.equals(it)) {
added = true;
}
}
// Test 5: We should NOT find our item in this list
assertFalse("List should not contain discoverable items", added);
}
/**
* Test of getID method, of class Item.
*/

View File

@@ -17,9 +17,13 @@ import java.sql.SQLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.UUID;
import javax.xml.stream.XMLStreamException;
import com.lyncode.xoai.dataprovider.exceptions.ConfigurationException;
@@ -38,6 +42,7 @@ import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.dspace.authorize.ResourcePolicy;
import org.dspace.authorize.factory.AuthorizeServiceFactory;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.Bitstream;
@@ -173,30 +178,107 @@ public class XOAI {
System.out
.println("Incremental import. Searching for documents modified after: "
+ last.toString());
// Index both in_archive items AND withdrawn items. Withdrawn items will be flagged withdrawn
// (in order to notify external OAI harvesters of their new status)
/*
* Index all changed or new items or items whose visibility is viable to
* change due to an embargo.
*/
try {
Iterator<Item> iterator = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(
context, last);
return this.index(iterator);
Iterator<Item> discoverableChangedItems = itemService
.findInArchiveOrWithdrawnDiscoverableModifiedSince(context, last);
Iterator<Item> nonDiscoverableChangedItems = itemService
.findInArchiveOrWithdrawnNonDiscoverableModifiedSince(context, last);
Iterator<Item> possiblyChangedItems = getItemsWithPossibleChangesBefore(last);
return this.index(discoverableChangedItems) + this.index(nonDiscoverableChangedItems)
+ this.index(possiblyChangedItems);
} catch (SQLException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
/**
* Get all items already in the index which are viable to change visibility
* due to an embargo. Only consider those which haven't been modified
* anyways since the last update, so they aren't updated twice in one import
* run.
*
* @param last
* maximum date for an item to be considered for an update
* @return Iterator over list of items which might have changed their
* visibility since the last update.
* @throws DSpaceSolrIndexerException
*/
private Iterator<Item> getItemsWithPossibleChangesBefore(Date last) throws DSpaceSolrIndexerException {
try {
SolrQuery params = new SolrQuery("item.willChangeStatus:true").addField("item.id");
SolrDocumentList documents = DSpaceSolrSearch.query(solrServerResolver.getServer(), params);
List<Item> items = new LinkedList<Item>();
for (int i = 0; i < documents.getNumFound(); i++) {
Item item = itemService.find(context,
UUID.fromString((String) documents.get(i).getFieldValue("item.id")));
if (item.getLastModified().before(last)) {
items.add(item);
}
}
return items.iterator();
} catch (SolrServerException | SQLException | DSpaceSolrException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
private int indexAll() throws DSpaceSolrIndexerException {
System.out.println("Full import");
try {
// Index both in_archive items AND withdrawn items. Withdrawn items will be flagged withdrawn
// Index both in_archive items AND withdrawn items. Withdrawn items
// will be flagged withdrawn
// (in order to notify external OAI harvesters of their new status)
Iterator<Item> iterator = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(
context, null);
return this.index(iterator);
Iterator<Item> discoverableItems = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(context,
null);
Iterator<Item> nonDiscoverableItems = itemService
.findInArchiveOrWithdrawnNonDiscoverableModifiedSince(context, null);
return this.index(discoverableItems) + this.index(nonDiscoverableItems);
} catch (SQLException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
/**
* Check if an item is already indexed. Using this, it is possible to check
* if withdrawn or nondiscoverable items have to be indexed at all.
*
* @param item
* Item that should be checked for its presence in the index.
* @return has it been indexed?
*/
private boolean checkIfIndexed(Item item) {
SolrQuery params = new SolrQuery("item.id:" + item.getID().toString()).addField("item.id");
try {
SolrDocumentList documents = DSpaceSolrSearch.query(solrServerResolver.getServer(), params);
return documents.getNumFound() == 1;
} catch (DSpaceSolrException | SolrServerException e) {
return false;
}
}
/**
* Check if an item is flagged visible in the index.
*
* @param item
* Item that should be checked for its presence in the index.
* @return has it been indexed?
*/
private boolean checkIfVisibleInOAI(Item item) {
SolrQuery params = new SolrQuery("item.id:" + item.getID().toString()).addField("item.public");
try {
SolrDocumentList documents = DSpaceSolrSearch.query(solrServerResolver.getServer(), params);
if (documents.getNumFound() == 1) {
return (boolean) documents.get(0).getFieldValue("item.public");
} else {
return false;
}
} catch (DSpaceSolrException | SolrServerException e) {
return false;
}
}
private int index(Iterator<Item> iterator)
throws DSpaceSolrIndexerException {
try {
@@ -230,35 +312,103 @@ public class XOAI {
}
}
private Date getMostRecentModificationDate(Item item) throws SQLException {
List<Date> dates = new LinkedList<Date>();
List<ResourcePolicy> policies = authorizeService.getPoliciesActionFilter(context, item, Constants.READ);
for (ResourcePolicy policy : policies) {
if (policy.getGroup().getName().equals("Anonymous")) {
if (policy.getStartDate() != null) {
dates.add(policy.getStartDate());
}
if (policy.getEndDate() != null) {
dates.add(policy.getEndDate());
}
}
}
dates.add(item.getLastModified());
Collections.sort(dates);
Date now = new Date();
Date lastChange = null;
for (Date d : dates) {
if (d.before(now)) {
lastChange = d;
}
}
return lastChange;
}
private SolrInputDocument index(Item item)
throws SQLException, MetadataBindException, ParseException, XMLStreamException, WritingXmlException {
throws SQLException, MetadataBindException, ParseException, XMLStreamException, WritingXmlException {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("item.id", item.getID());
boolean pub = this.isPublic(item);
doc.addField("item.public", pub);
String handle = item.getHandle();
doc.addField("item.handle", handle);
doc.addField("item.lastmodified", item.getLastModified());
boolean isEmbargoed = !this.isPublic(item);
boolean isCurrentlyVisible = this.checkIfVisibleInOAI(item);
boolean isIndexed = this.checkIfIndexed(item);
/*
* If the item is not under embargo, it should be visible. If it is,
* make it invisible if this is the first time it is indexed. For
* subsequent index runs, keep the current status, so that if the item
* is embargoed again, it is flagged as deleted instead and does not
* just disappear, or if it is still under embargo, it won't become
* visible and be known to harvesters as deleted before it gets
* disseminated for the first time. The item has to be indexed directly
* after publication even if it is still embargoed, because its
* lastModified date will not change when the embargo end date (or start
* date) is reached. To circumvent this, an item which will change its
* status in the future will be marked as such.
*/
boolean isPublic = isEmbargoed ? (isIndexed ? isCurrentlyVisible : false) : true;
doc.addField("item.public", isPublic);
// if the visibility of the item will change in the future due to an
// embargo, mark it as such.
doc.addField("item.willChangeStatus", willChangeStatus(item));
/*
* Mark an item as deleted not only if it is withdrawn, but also if it
* is made private, because items should not simply disappear from OAI
* with a transient deletion policy. Do not set the flag for still
* invisible embargoed items, because this will override the item.public
* flag.
*/
doc.addField("item.deleted",
(item.isWithdrawn() || !item.isDiscoverable() || (isEmbargoed ? isPublic : false)));
/*
* An item that is embargoed will potentially not be harvested by
* incremental harvesters if the from and until params do not encompass
* both the standard lastModified date and the anonymous-READ resource
* policy start date. The same is true for the end date, where
* harvesters might not get a tombstone record. Therefore, consider all
* relevant policy dates and the standard lastModified date and take the
* most recent of those which have already passed.
*/
doc.addField("item.lastmodified", this.getMostRecentModificationDate(item));
if (item.getSubmitter() != null) {
doc.addField("item.submitter", item.getSubmitter().getEmail());
}
doc.addField("item.deleted", item.isWithdrawn() ? "true" : "false");
for (Collection col : item.getCollections()) {
doc.addField("item.collections",
"col_" + col.getHandle().replace("/", "_"));
for (Collection col: item.getCollections()) {
doc.addField("item.collections", "col_" + col.getHandle().replace("/", "_"));
}
for (Community com : collectionsService.flatParentCommunities(context, item)) {
doc.addField("item.communities",
"com_" + com.getHandle().replace("/", "_"));
doc.addField("item.communities", "com_" + com.getHandle().replace("/", "_"));
}
List<MetadataValue> allData = itemService.getMetadata(item,
Item.ANY, Item.ANY, Item.ANY, Item.ANY);
List<MetadataValue> allData = itemService.getMetadata(item, Item.ANY, Item.ANY, Item.ANY, Item.ANY);
for (MetadataValue dc : allData) {
MetadataField field = dc.getMetadataField();
String key = "metadata."
+ field.getMetadataSchema().getName() + "."
+ field.getElement();
String key = "metadata." + field.getMetadataSchema().getName() + "." + field.getElement();
if (field.getQualifier() != null) {
key += "." + field.getQualifier();
}
@@ -281,17 +431,35 @@ public class XOAI {
doc.addField("item.compile", out.toString());
if (verbose) {
println(String.format("Item %s with handle %s indexed",
item.getID().toString(), handle));
println("Item with handle " + handle + " indexed");
}
return doc;
}
private boolean willChangeStatus(Item item) throws SQLException {
List<ResourcePolicy> policies = authorizeService.getPoliciesActionFilter(context, item, Constants.READ);
for (ResourcePolicy policy : policies) {
if (policy.getGroup().getName().equals("Anonymous")) {
if (policy.getStartDate() != null && policy.getStartDate().after(new Date())) {
return true;
}
if (policy.getEndDate() != null && policy.getEndDate().after(new Date())) {
return true;
}
}
}
return false;
}
private boolean isPublic(Item item) {
boolean pub = false;
try {
//Check if READ access allowed on this Item
// Check if READ access allowed on this Item
pub = authorizeService.authorizeActionBoolean(context, item, Constants.READ);
} catch (SQLException ex) {
log.error(ex.getMessage());

View File

@@ -166,7 +166,9 @@
<field name="item.lastmodified" type="date" indexed="true" stored="true" multiValued="false" />
<field name="item.submitter" type="string" indexed="true" stored="true" multiValued="false" />
<field name="item.deleted" type="boolean" indexed="true" stored="true" multiValued="false" />
<!-- if true, item.public will change in the future due to an embargo being set/lifted -->
<field name="item.willChangeStatus" type="boolean" indexed="true" stored="true" multiValued="false" />
<!-- Item compiled -->
<field name="item.compile" type="string" indexed="false" stored="true" multiValued="false" />