124504: Move configuration to be included in the bulkedit module and apply the configured limit earlier, never obtaining a larger list than actually required

This commit is contained in:
Jens Vannerum
2025-02-03 10:09:47 +01:00
parent c73c739deb
commit b63ffd2eb4
6 changed files with 31 additions and 31 deletions

View File

@@ -139,6 +139,8 @@ public class MetadataExportSearch extends DSpaceRunnable<MetadataExportSearchScr
DiscoverQuery discoverQuery = DiscoverQuery discoverQuery =
queryBuilder.buildQuery(context, dso, discoveryConfiguration, query, queryBuilderSearchFilters, queryBuilder.buildQuery(context, dso, discoveryConfiguration, query, queryBuilderSearchFilters,
"Item", 10, Long.getLong("0"), null, SortOption.DESCENDING); "Item", 10, Long.getLong("0"), null, SortOption.DESCENDING);
// add configured limit
discoverQuery.setMaxResults(metadataDSpaceCsvExportService.getCsvExportLimit());
handler.logDebug("creating iterator"); handler.logDebug("creating iterator");
Iterator<Item> itemIterator = searchService.iteratorSearch(context, dso, discoverQuery); Iterator<Item> itemIterator = searchService.iteratorSearch(context, dso, discoverQuery);

View File

@@ -15,7 +15,6 @@ import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.UUID; import java.util.UUID;
import org.apache.commons.collections.IteratorUtils;
import org.dspace.app.bulkedit.DSpaceCSV; import org.dspace.app.bulkedit.DSpaceCSV;
import org.dspace.app.util.service.DSpaceObjectUtils; import org.dspace.app.util.service.DSpaceObjectUtils;
import org.dspace.content.service.ItemService; import org.dspace.content.service.ItemService;
@@ -41,6 +40,8 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
@Autowired @Autowired
private ConfigurationService configurationService; private ConfigurationService configurationService;
private int csxExportLimit = -1;
@Override @Override
public DSpaceCSV handleExport(Context context, boolean exportAllItems, boolean exportAllMetadata, String identifier, public DSpaceCSV handleExport(Context context, boolean exportAllItems, boolean exportAllMetadata, String identifier,
DSpaceRunnableHandler handler) throws Exception { DSpaceRunnableHandler handler) throws Exception {
@@ -48,7 +49,7 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
if (exportAllItems) { if (exportAllItems) {
handler.logInfo("Exporting whole repository WARNING: May take some time!"); handler.logInfo("Exporting whole repository WARNING: May take some time!");
toExport = itemService.findAll(context); toExport = itemService.findAll(context, getCsvExportLimit(), 0);
} else { } else {
DSpaceObject dso = HandleServiceFactory.getInstance().getHandleService() DSpaceObject dso = HandleServiceFactory.getInstance().getHandleService()
.resolveToObject(context, identifier); .resolveToObject(context, identifier);
@@ -68,7 +69,7 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
} else if (dso.getType() == Constants.COLLECTION) { } else if (dso.getType() == Constants.COLLECTION) {
handler.logInfo("Exporting collection '" + dso.getName() + "' (" + identifier + ")"); handler.logInfo("Exporting collection '" + dso.getName() + "' (" + identifier + ")");
Collection collection = (Collection) dso; Collection collection = (Collection) dso;
toExport = itemService.findByCollection(context, collection); toExport = itemService.findByCollection(context, collection, getCsvExportLimit(), 0);
} else if (dso.getType() == Constants.COMMUNITY) { } else if (dso.getType() == Constants.COMMUNITY) {
handler.logInfo("Exporting community '" + dso.getName() + "' (" + identifier + ")"); handler.logInfo("Exporting community '" + dso.getName() + "' (" + identifier + ")");
toExport = buildFromCommunity(context, (Community) dso); toExport = buildFromCommunity(context, (Community) dso);
@@ -91,7 +92,6 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
// Process each item // Process each item
DSpaceCSV csv = new DSpaceCSV(exportAll); DSpaceCSV csv = new DSpaceCSV(exportAll);
toExport = setItemsToExportWithLimit(context, toExport, handler);
while (toExport.hasNext()) { while (toExport.hasNext()) {
Item item = toExport.next(); Item item = toExport.next();
csv.addItem(item); csv.addItem(item);
@@ -109,20 +109,6 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
return export(context, buildFromCommunity(context, community), exportAll, handler); return export(context, buildFromCommunity(context, community), exportAll, handler);
} }
private Iterator<Item> setItemsToExportWithLimit(Context context, Iterator<Item> toExport,
DSpaceRunnableHandler handler) throws SQLException {
int itemExportLimit = configurationService.getIntProperty(
"metadataexport.max.items", 500);
List<Item> items = IteratorUtils.toList(toExport);
if (items.size() > itemExportLimit) {
handler.logWarning("The amount of items to export is higher than the limit of " + itemExportLimit
+ " items. Only the first " + itemExportLimit + " items will be exported.");
items = items.subList(0, itemExportLimit);
}
return items.iterator();
}
/** /**
* Build a Java Collection of item IDs that are in a Community (including * Build a Java Collection of item IDs that are in a Community (including
* its sub-Communities and Collections) * its sub-Communities and Collections)
@@ -135,13 +121,16 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
private Iterator<Item> buildFromCommunity(Context context, Community community) private Iterator<Item> buildFromCommunity(Context context, Community community)
throws SQLException { throws SQLException {
Set<Item> result = new HashSet<>(); Set<Item> result = new HashSet<>();
int itemsAdded = 0;
// Add all the collections // Add all the collections
List<Collection> collections = community.getCollections(); List<Collection> collections = community.getCollections();
for (Collection collection : collections) { for (Collection collection : collections) {
Iterator<Item> items = itemService.findByCollection(context, collection); // Never obtain more items than the configured limit
while (items.hasNext()) { Iterator<Item> items = itemService.findByCollection(context, collection, getCsvExportLimit(), 0);
while (itemsAdded <= getCsvExportLimit() && items.hasNext()) {
result.add(items.next()); result.add(items.next());
itemsAdded++;
} }
} }
@@ -149,11 +138,20 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
List<Community> communities = community.getSubcommunities(); List<Community> communities = community.getSubcommunities();
for (Community subCommunity : communities) { for (Community subCommunity : communities) {
Iterator<Item> items = buildFromCommunity(context, subCommunity); Iterator<Item> items = buildFromCommunity(context, subCommunity);
while (items.hasNext()) { while (itemsAdded <= getCsvExportLimit() && items.hasNext()) {
result.add(items.next()); result.add(items.next());
itemsAdded++;
} }
} }
return result.iterator(); return result.iterator();
} }
@Override
public int getCsvExportLimit() {
if (csxExportLimit == -1) {
csxExportLimit = configurationService.getIntProperty("bulkedit.export.max.items", 500);
}
return csxExportLimit;
}
} }

View File

@@ -58,4 +58,6 @@ public interface MetadataDSpaceCsvExportService {
public DSpaceCSV export(Context context, Community community, public DSpaceCSV export(Context context, Community community,
boolean exportAll, DSpaceRunnableHandler handler) throws Exception; boolean exportAll, DSpaceRunnableHandler handler) throws Exception;
int getCsvExportLimit();
} }

View File

@@ -906,13 +906,6 @@ org.dspace.app.itemexport.life.span.hours = 48
# cumulative sizes are more than this entry the export is not kicked off # cumulative sizes are more than this entry the export is not kicked off
org.dspace.app.itemexport.max.size = 200 org.dspace.app.itemexport.max.size = 200
### Bulkedit Metadata export settings
# The maximum amount of items that can be exported using the "metadata-export" / "metadata-export-search" script
# Recommend to keep this at a feasible number, as exporting large amounts of items can be resource intensive
# If not set, this will default to 500 items
# metadataexport.max.items = 500
### Batch Item import settings ### ### Batch Item import settings ###
# The directory where the results of imports will be placed (mapfile, upload file) # The directory where the results of imports will be placed (mapfile, upload file)
org.dspace.app.batchitemimport.work.dir = ${dspace.dir}/imports org.dspace.app.batchitemimport.work.dir = ${dspace.dir}/imports

View File

@@ -40,3 +40,8 @@ bulkedit.allow-bulk-deletion = dspace.agreements.end-user
# By default this is set to 100 # By default this is set to 100
bulkedit.change.commit.count = 100 bulkedit.change.commit.count = 100
### Bulkedit Metadata export settings
# The maximum amount of items that can be exported using the "metadata-export" / "metadata-export-search" script
# Recommend to keep this at a feasible number, as exporting large amounts of items can be resource intensive
# If not set, this will default to 500 items
# bulkedit.export.max.items = 500

View File

@@ -59,4 +59,4 @@ rest.properties.exposed = ldn.notify.inbox
rest.properties.exposed = handle.canonical.prefix rest.properties.exposed = handle.canonical.prefix
rest.properties.exposed = contentreport.enable rest.properties.exposed = contentreport.enable
rest.properties.exposed = duplicate.enable rest.properties.exposed = duplicate.enable
rest.properties.exposed = metadataexport.max.items rest.properties.exposed = bulkedit.export.max.items