Merge pull request #661 from LongsightGroup/DS-682-osu-select-collection

DS-682 Optimize Select Collection Performance
This commit is contained in:
Peter Dietz
2014-10-28 02:31:19 -04:00
10 changed files with 393 additions and 10 deletions

View File

@@ -0,0 +1,116 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.util;
import org.apache.log4j.Logger;
import org.dspace.content.Collection;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.springframework.util.StopWatch;
import java.sql.SQLException;
import java.util.ArrayList;
/**
* @author peterdietz
* A command line tool to verify/test the accuracy and speed gains of Collection.findAuthorizedOptimized()
* Invocation: dsrun org.dspace.app.util.OptimizeSelectCollection
*/
public class OptimizeSelectCollection {
private static final Logger log = Logger.getLogger(OptimizeSelectCollection.class);
private static Context context;
private static ArrayList<EPerson> brokenPeople;
private static Long timeSavedMS = 0L;
public static void main(String[] argv) throws Exception
{
System.out.println("OptimizeSelectCollection tool.");
System.out.println("We want to verify that the optimized version of select collection logic produces the same " +
"values as the legacy select-collection logic.");
context = new Context();
brokenPeople = new ArrayList<EPerson>();
int peopleChecked = 0;
timeSavedMS = 0L;
if(argv != null && argv.length > 0) {
for(String email : argv) {
EPerson person = EPerson.findByEmail(context, email);
checkSelectCollectionForUser(person);
peopleChecked++;
}
} else {
//default case, run as specific user, or run all...
EPerson[] people = EPerson.findAll(context, EPerson.EMAIL);
for(EPerson person : people) {
checkSelectCollectionForUser(person);
peopleChecked++;
}
}
if(brokenPeople.size() > 0) {
System.out.println("NOT DONE YET!!! Some people don't have all their collections.");
for(EPerson person : brokenPeople) {
System.out.println("-- " + person.getEmail());
}
} else {
System.out.println("All Good: " + peopleChecked + " people have been checked, with same submission powers. TimeSaved(ms): " + timeSavedMS);
}
}
private static void checkSelectCollectionForUser(EPerson person) throws SQLException {
context.setCurrentUser(person);
StopWatch stopWatch = new StopWatch("SelectCollectionStep Optimization (" + person.getEmail() + ")");
System.out.println("User: " + person.getEmail());
stopWatch.start("findAuthorized");
Collection[] collections = Collection.findAuthorized(context, null, Constants.ADD);
stopWatch.stop();
Long defaultMS = stopWatch.getLastTaskTimeMillis();
stopWatch.start("ListingCollections");
System.out.println("Legacy Find Authorized");
reportCollections(collections);
stopWatch.stop();
stopWatch.start("findAuthorizedOptimized");
Collection[] collectionsOptimized = Collection.findAuthorizedOptimized(context, Constants.ADD);
stopWatch.stop();
Long optimizedMS = stopWatch.getLastTaskTimeMillis();
timeSavedMS += defaultMS - optimizedMS;
stopWatch.start("ListingCollectionsWithOptimizedCollections");
System.out.println("Find Authorized Optimized");
reportCollections(collectionsOptimized);
stopWatch.stop();
if (collections.length == collectionsOptimized.length) {
System.out.println("Number of collections matches - Good");
} else {
System.out.println("Number of collections doesn't match -- Bad");
brokenPeople.add(person);
}
System.out.println(stopWatch.prettyPrint());
}
private static void reportCollections(Collection[] collections) {
System.out.println("====================================");
System.out.println("This user is permitted to submit to the following collections.");
for(Collection collection : collections) {
System.out.println(" - " + collection.getHandle() + " -- " + collection.getName());
}
System.out.println("Total: " + collections.length);
}
}

View File

@@ -33,10 +33,7 @@ import java.io.InputStream;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.MissingResourceException;
import java.util.*;
/**
* Class representing a collection.
@@ -1505,6 +1502,82 @@ public class Collection extends DSpaceObject
myCollections = (Collection[]) myResults.toArray(myCollections);
return myCollections;
}
public static Collection[] findAuthorizedOptimized(Context context, int actionID) throws java.sql.SQLException
{
if(! ConfigurationManager.getBooleanProperty("org.dspace.content.Collection.findAuthorizedPerformanceOptimize", true)) {
// Fallback to legacy query if config says so. The rationale could be that a site found a bug.
return findAuthorized(context, null, actionID);
}
List<Collection> myResults = new ArrayList<Collection>();
if(AuthorizeManager.isAdmin(context))
{
return findAll(context);
}
//Check eperson->policy
Collection[] directToCollection = findDirectMapped(context, actionID);
for (int i = 0; i< directToCollection.length; i++)
{
if(!myResults.contains(directToCollection[i]))
{
myResults.add(directToCollection[i]);
}
}
//Check eperson->groups->policy
Collection[] groupToCollection = findGroupMapped(context, actionID);
for (int i = 0; i< groupToCollection.length; i++)
{
if(!myResults.contains(groupToCollection[i]))
{
myResults.add(groupToCollection[i]);
}
}
//Check eperson->groups->groups->policy->collection
//i.e. Malcolm Litchfield is a member of OSU_Press_Embargo,
// which is a member of: COLLECTION_24_ADMIN, COLLECTION_24_SUBMIT
Collection[] group2GroupToCollection = findGroup2GroupMapped(context, actionID);
for (int i = 0; i< group2GroupToCollection.length; i++)
{
if(!myResults.contains(group2GroupToCollection[i]))
{
myResults.add(group2GroupToCollection[i]);
}
}
//TODO Check eperson->groups->groups->policy->community
//TODO Check eperson->groups->policy->community
// i.e. Typical Community Admin -- name.# > COMMUNITY_10_ADMIN > Ohio State University Press
//Check eperson->comm-admin
Collection[] group2commCollections = findGroup2CommunityMapped(context);
for (int i = 0; i< group2commCollections.length; i++)
{
if(!myResults.contains(group2commCollections[i]))
{
myResults.add(group2commCollections[i]);
}
}
// Return the collections, sorted alphabetically
Collections.sort(myResults, new CollectionComparator());
Collection[] myCollections = new Collection[myResults.size()];
myCollections = (Collection[]) myResults.toArray(myCollections);
return myCollections;
}
/**
@@ -1609,4 +1682,116 @@ public class Collection extends DSpaceObject
ourContext.addEvent(new Event(Event.MODIFY, Constants.COLLECTION,
getID(), null, getIdentifiers(ourContext)));
}
//TODO replace hard-coded action_id's with constants...
public static Collection[] findDirectMapped(Context context, int actionID) throws java.sql.SQLException
{
//eperson_id -> resourcepolicy.eperson_id
TableRowIterator tri = DatabaseManager.query(context,
"SELECT * FROM collection, resourcepolicy, eperson " +
"WHERE resourcepolicy.resource_id = collection.collection_id AND " +
"eperson.eperson_id = resourcepolicy.eperson_id AND "+
"resourcepolicy.resource_type_id = 3 AND "+
"( resourcepolicy.action_id = 3 OR resourcepolicy.action_id = 11 ) AND "+
"eperson.eperson_id = ?", context.getCurrentUser().getID());
return produceCollectionsFromQuery(context, tri);
}
public static Collection[] findGroupMapped(Context context, int actionID) throws java.sql.SQLException
{
//eperson_id -> resourcepolicy.eperson_id
TableRowIterator tri = DatabaseManager.query(context,
"SELECT * FROM collection, resourcepolicy, eperson, epersongroup2eperson " +
"WHERE resourcepolicy.resource_id = collection.collection_id AND "+
"eperson.eperson_id = epersongroup2eperson.eperson_id AND "+
"epersongroup2eperson.eperson_group_id = resourcepolicy.epersongroup_id AND "+
"resourcepolicy.resource_type_id = 3 AND "+
"( resourcepolicy.action_id = 3 OR resourcepolicy.action_id = 11 ) AND "+
"eperson.eperson_id = ?", context.getCurrentUser().getID());
return produceCollectionsFromQuery(context, tri);
}
public static Collection[] findGroup2GroupMapped(Context context, int actionID) throws SQLException {
TableRowIterator tri = DatabaseManager.query(context,
"SELECT \n" +
" * \n" +
"FROM \n" +
" public.eperson, \n" +
" public.epersongroup2eperson, \n" +
" public.epersongroup, \n" +
" public.group2group, \n" +
" public.resourcepolicy rp_parent, \n" +
" public.collection\n" +
"WHERE \n" +
" epersongroup2eperson.eperson_id = eperson.eperson_id AND\n" +
" epersongroup.eperson_group_id = epersongroup2eperson.eperson_group_id AND\n" +
" group2group.child_id = epersongroup.eperson_group_id AND\n" +
" rp_parent.epersongroup_id = group2group.parent_id AND\n" +
" collection.collection_id = rp_parent.resource_id AND\n" +
" eperson.eperson_id = ? AND \n" +
" (rp_parent.action_id = 3 OR \n" +
" rp_parent.action_id = 11 \n" +
" ) AND rp_parent.resource_type_id = 3;", context.getCurrentUser().getID());
return produceCollectionsFromQuery(context, tri);
}
public static Collection[] findGroup2CommunityMapped(Context context) throws SQLException {
TableRowIterator tri = DatabaseManager.query(context,
"SELECT \n" +
" * \n" +
"FROM \n" +
" public.eperson, \n" +
" public.epersongroup2eperson, \n" +
" public.epersongroup, \n" +
" public.community, \n" +
" public.resourcepolicy\n" +
"WHERE \n" +
" epersongroup2eperson.eperson_id = eperson.eperson_id AND\n" +
" epersongroup.eperson_group_id = epersongroup2eperson.eperson_group_id AND\n" +
" resourcepolicy.epersongroup_id = epersongroup.eperson_group_id AND\n" +
" resourcepolicy.resource_id = community.community_id AND\n" +
" ( resourcepolicy.action_id = 3 OR \n" +
" resourcepolicy.action_id = 11) AND \n" +
" resourcepolicy.resource_type_id = 4 AND eperson.eperson_id = ?", context.getCurrentUser().getID());
return produceCollectionsFromCommunityQuery(context, tri);
}
public static class CollectionComparator implements Comparator<Collection> {
@Override
public int compare(Collection collection1, Collection collection2) {
return collection1.getName().compareTo(collection2.getName());
}
}
public static Collection[] produceCollectionsFromQuery(Context context, TableRowIterator tri) throws SQLException {
List<Collection> collections = new ArrayList<Collection>();
while(tri.hasNext()) {
TableRow row = tri.next();
Collection collection = Collection.find(context, row.getIntColumn("collection_id"));
collections.add(collection);
}
return collections.toArray(new Collection[0]);
}
public static Collection[] produceCollectionsFromCommunityQuery(Context context, TableRowIterator tri) throws SQLException {
List<Collection> collections = new ArrayList<Collection>();
while(tri.hasNext()) {
TableRow commRow = tri.next();
Community community = Community.find(context, commRow.getIntColumn("community_id"));
Collection[] comCollections = community.getCollections();
for(Collection collection : comCollections) {
collections.add(collection);
}
//ugh, handle that communities has subcommunities...
//TODO community.getAllCollections();
}
return collections.toArray(new Collection[0]);
}
}

View File

@@ -10,10 +10,17 @@ package org.dspace.content;
import java.io.File;
import java.io.FileInputStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
import org.dspace.authorize.AuthorizeException;
import org.apache.log4j.Logger;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.Group;
import org.elasticsearch.common.collect.Lists;
import org.junit.*;
import static org.junit.Assert.* ;
import static org.hamcrest.CoreMatchers.*;
@@ -1821,6 +1828,76 @@ public class CollectionTest extends AbstractDSpaceObjectTest
assertTrue("testFindAuthorized 10",found.length >= 1);
}
/**
* Test of findAuthorizedOptimized method, of class Collection.
* We create some collections and some users with varying auth, and ensure we can access them all properly.
*/
@Test
public void testFindAuthorizedOptimized() throws Exception
{
context.turnOffAuthorisationSystem();
Community com = Community.create(null, context);
Collection collectionA = Collection.create(context);
Collection collectionB = Collection.create(context);
Collection collectionC = Collection.create(context);
com.addCollection(collectionA);
com.addCollection(collectionB);
com.addCollection(collectionC);
EPerson epersonA = EPerson.create(context);
EPerson epersonB = EPerson.create(context);
EPerson epersonC = EPerson.create(context);
EPerson epersonD = EPerson.create(context);
//personA can submit to collectionA and collectionC
AuthorizeManager.addPolicy(context, collectionA, Constants.ADD, epersonA);
AuthorizeManager.addPolicy(context, collectionC, Constants.ADD, epersonA);
//personB can submit to collectionB and collectionC
AuthorizeManager.addPolicy(context, collectionB, Constants.ADD, epersonB);
AuthorizeManager.addPolicy(context, collectionC, Constants.ADD, epersonB);
//personC can only submit to collectionC
AuthorizeManager.addPolicy(context, collectionC, Constants.ADD, epersonC);
//personD no submission powers
context.restoreAuthSystemState();
context.setCurrentUser(epersonA);
Collection[] personACollections = Collection.findAuthorizedOptimized(context, Constants.ADD);
assertTrue("testFindAuthorizeOptimized A", personACollections.length == 2);
List<Collection> aList = Arrays.asList(personACollections);
assertTrue("testFindAuthorizeOptimized A.A", aList.contains(collectionA));
assertFalse("testFindAuthorizeOptimized A.A", aList.contains(collectionB));
assertTrue("testFindAuthorizeOptimized A.A", aList.contains(collectionC));
context.setCurrentUser(epersonB);
Collection[] personBCollections = Collection.findAuthorizedOptimized(context, Constants.ADD);
assertTrue("testFindAuthorizeOptimized B", personBCollections.length == 2);
List<Collection> bList = Arrays.asList(personBCollections);
assertFalse("testFindAuthorizeOptimized B.A", bList.contains(collectionA));
assertTrue("testFindAuthorizeOptimized B.B", bList.contains(collectionB));
assertTrue("testFindAuthorizeOptimized B.C", bList.contains(collectionC));
context.setCurrentUser(epersonC);
Collection[] personCCollections = Collection.findAuthorizedOptimized(context, Constants.ADD);
assertTrue("testFindAuthorizeOptimized C", personCCollections.length == 1);
List<Collection> cList = Arrays.asList(personCCollections);
assertFalse("testFindAuthorizeOptimized C.A", cList.contains(collectionA));
assertFalse("testFindAuthorizeOptimized C.B", cList.contains(collectionB));
assertTrue("testFindAuthorizeOptimized C.C", cList.contains(collectionC));
context.setCurrentUser(epersonD);
Collection[] personDCollections = Collection.findAuthorizedOptimized(context, Constants.ADD);
assertTrue("testFindAuthorizeOptimized D", personDCollections.length == 0);
List<Collection> dList = Arrays.asList(personDCollections);
assertFalse("testFindAuthorizeOptimized D.A", dList.contains(collectionA));
assertFalse("testFindAuthorizeOptimized D.B", dList.contains(collectionB));
assertFalse("testFindAuthorizeOptimized D.C", dList.contains(collectionC));
}
/**
* Test of countItems method, of class Collection.
*/

View File

@@ -136,7 +136,7 @@ public class JSPSelectCollectionStep extends JSPStep
else
{
// Show all collections
collections = Collection.findAuthorized(context, null,
collections = Collection.findAuthorizedOptimized(context,
Constants.ADD);
}

View File

@@ -154,7 +154,7 @@ public class JSPStartSubmissionLookupStep extends JSPStep
else
{
// Show all collections
collections = Collection.findAuthorized(context, null,
collections = Collection.findAuthorizedOptimized(context,
Constants.ADD);
}

View File

@@ -67,7 +67,7 @@ public class MoveItemForm extends AbstractDSpaceTransformer {
Division main = body.addInteractiveDivision("move-item", contextPath+"/admin/item", Division.METHOD_POST, "primary administrative item");
main.setHead(T_head1.parameterize(item.getHandle()));
Collection[] collections = Collection.findAuthorized(context, null, Constants.ADD);
Collection[] collections = Collection.findAuthorizedOptimized(context, Constants.ADD);
List list = main.addList("select-collection", List.TYPE_FORM);
Select select = list.addItem().addSelect("collectionID");

View File

@@ -172,7 +172,7 @@ public class Submissions extends AbstractDSpaceTransformer
if (unfinishedItems.length <= 0 && supervisedItems.length <= 0)
{
Collection[] collections = Collection.findAuthorized(context, null, Constants.ADD);
Collection[] collections = Collection.findAuthorizedOptimized(context, Constants.ADD);
if (collections.length > 0)
{

View File

@@ -82,7 +82,7 @@ public class SelectCollectionStep extends AbstractSubmissionStep
}
else
{
collections = Collection.findAuthorized(context, null, Constants.ADD);
collections = Collection.findAuthorizedOptimized(context, Constants.ADD);
}
// Basic form with a drop down list of all the collections

View File

@@ -709,7 +709,7 @@
<message key="xmlui.Submission.submit.ResumeStep.submit_resume">Resume</message>
<!-- org.dspace.app.xmlui.Submission.submit.SelectCollection -->
<message key="batch">Select a collection</message>
<message key="xmlui.Submission.submit.SelectCollection.head">Select a collection</message>
<message key="xmlui.Submission.submit.SelectCollection.collection">Collection</message>
<message key="xmlui.Submission.submit.SelectCollection.collection_help">Select the collection you wish to submit an item to.</message>
<message key="xmlui.Submission.submit.SelectCollection.collection_default">Select a collection...</message>

View File

@@ -814,6 +814,11 @@ org.dspace.app.itemexport.max.size = 200
# The directory where the results of imports will be placed (mapfile, upload file)
org.dspace.app.batchitemimport.work.dir = ${dspace.dir}/imports
# Enable performance optimization for select-collection-step collection query
# The only reason you might opt-out is in case the optimized algorithm missed a policy
# default = true, (enabled)
#org.dspace.content.Collection.findAuthorizedPerformanceOptimize = false
# For backwards compatibility, the subscription emails by default include any modified items
# uncomment the following entry for only new items to be emailed
# eperson.subscription.onlynew = true