Merge pull request #10689 from atmire/w2p-119612_export-item-limit-dspace-8_x

[Port dspace-8_x] support a configurable limit for exporting items as it can take a up a huge amount of resources
This commit is contained in:
Tim Donohue
2025-07-07 13:07:29 -05:00
committed by GitHub
6 changed files with 51 additions and 18 deletions

View File

@@ -143,7 +143,7 @@ public class MetadataExportSearch extends DSpaceRunnable<MetadataExportSearchScr
Iterator<Item> itemIterator = searchService.iteratorSearch(context, dso, discoverQuery); Iterator<Item> itemIterator = searchService.iteratorSearch(context, dso, discoverQuery);
handler.logDebug("creating dspacecsv"); handler.logDebug("creating dspacecsv");
DSpaceCSV dSpaceCSV = metadataDSpaceCsvExportService.export(context, itemIterator, true); DSpaceCSV dSpaceCSV = metadataDSpaceCsvExportService.export(context, itemIterator, true, handler);
handler.logDebug("writing to file " + getFileNameOrExportFile()); handler.logDebug("writing to file " + getFileNameOrExportFile());
handler.writeFilestream(context, getFileNameOrExportFile(), dSpaceCSV.getInputStream(), EXPORT_CSV); handler.writeFilestream(context, getFileNameOrExportFile(), dSpaceCSV.getInputStream(), EXPORT_CSV);
context.restoreAuthSystemState(); context.restoreAuthSystemState();

View File

@@ -23,6 +23,7 @@ import org.dspace.core.Constants;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.handle.factory.HandleServiceFactory; import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.scripts.handler.DSpaceRunnableHandler; import org.dspace.scripts.handler.DSpaceRunnableHandler;
import org.dspace.services.ConfigurationService;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
/** /**
@@ -36,6 +37,11 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
@Autowired @Autowired
private DSpaceObjectUtils dSpaceObjectUtils; private DSpaceObjectUtils dSpaceObjectUtils;
@Autowired
private ConfigurationService configurationService;
private int csxExportLimit = -1;
@Override @Override
public DSpaceCSV handleExport(Context context, boolean exportAllItems, boolean exportAllMetadata, String identifier, public DSpaceCSV handleExport(Context context, boolean exportAllItems, boolean exportAllMetadata, String identifier,
DSpaceRunnableHandler handler) throws Exception { DSpaceRunnableHandler handler) throws Exception {
@@ -43,7 +49,7 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
if (exportAllItems) { if (exportAllItems) {
handler.logInfo("Exporting whole repository WARNING: May take some time!"); handler.logInfo("Exporting whole repository WARNING: May take some time!");
toExport = itemService.findAll(context); toExport = itemService.findAll(context, getCsvExportLimit(), 0);
} else { } else {
DSpaceObject dso = HandleServiceFactory.getInstance().getHandleService() DSpaceObject dso = HandleServiceFactory.getInstance().getHandleService()
.resolveToObject(context, identifier); .resolveToObject(context, identifier);
@@ -63,7 +69,7 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
} else if (dso.getType() == Constants.COLLECTION) { } else if (dso.getType() == Constants.COLLECTION) {
handler.logInfo("Exporting collection '" + dso.getName() + "' (" + identifier + ")"); handler.logInfo("Exporting collection '" + dso.getName() + "' (" + identifier + ")");
Collection collection = (Collection) dso; Collection collection = (Collection) dso;
toExport = itemService.findByCollection(context, collection); toExport = itemService.findByCollection(context, collection, getCsvExportLimit(), 0);
} else if (dso.getType() == Constants.COMMUNITY) { } else if (dso.getType() == Constants.COMMUNITY) {
handler.logInfo("Exporting community '" + dso.getName() + "' (" + identifier + ")"); handler.logInfo("Exporting community '" + dso.getName() + "' (" + identifier + ")");
toExport = buildFromCommunity(context, (Community) dso); toExport = buildFromCommunity(context, (Community) dso);
@@ -74,18 +80,21 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
} }
} }
DSpaceCSV csv = this.export(context, toExport, exportAllMetadata); DSpaceCSV csv = this.export(context, toExport, exportAllMetadata, handler);
return csv; return csv;
} }
@Override @Override
public DSpaceCSV export(Context context, Iterator<Item> toExport, boolean exportAll) throws Exception { public DSpaceCSV export(Context context, Iterator<Item> toExport,
boolean exportAll, DSpaceRunnableHandler handler) throws Exception {
Context.Mode originalMode = context.getCurrentMode(); Context.Mode originalMode = context.getCurrentMode();
context.setMode(Context.Mode.READ_ONLY); context.setMode(Context.Mode.READ_ONLY);
// Process each item // Process each item until we reach the limit
int itemExportLimit = getCsvExportLimit();
DSpaceCSV csv = new DSpaceCSV(exportAll); DSpaceCSV csv = new DSpaceCSV(exportAll);
while (toExport.hasNext()) {
for (int itemsAdded = 0; toExport.hasNext() && itemsAdded < itemExportLimit; itemsAdded++) {
Item item = toExport.next(); Item item = toExport.next();
csv.addItem(item); csv.addItem(item);
context.uncacheEntity(item); context.uncacheEntity(item);
@@ -97,8 +106,9 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
} }
@Override @Override
public DSpaceCSV export(Context context, Community community, boolean exportAll) throws Exception { public DSpaceCSV export(Context context, Community community,
return export(context, buildFromCommunity(context, community), exportAll); boolean exportAll, DSpaceRunnableHandler handler) throws Exception {
return export(context, buildFromCommunity(context, community), exportAll, handler);
} }
/** /**
@@ -117,21 +127,30 @@ public class MetadataDSpaceCsvExportServiceImpl implements MetadataDSpaceCsvExpo
// Add all the collections // Add all the collections
List<Collection> collections = community.getCollections(); List<Collection> collections = community.getCollections();
for (Collection collection : collections) { for (Collection collection : collections) {
Iterator<Item> items = itemService.findByCollection(context, collection); // Never obtain more items than the configured limit
while (items.hasNext()) { Iterator<Item> items = itemService.findByCollection(context, collection, getCsvExportLimit(), 0);
while (result.size() < getCsvExportLimit() && items.hasNext()) {
result.add(items.next()); result.add(items.next());
} }
} }
// Add all the sub-communities // Add all the sub-communities
List<Community> communities = community.getSubcommunities(); List<Community> communities = community.getSubcommunities();
for (Community subCommunity : communities) { for (Community subCommunity : communities) {
Iterator<Item> items = buildFromCommunity(context, subCommunity); Iterator<Item> items = buildFromCommunity(context, subCommunity);
while (items.hasNext()) { while (result.size() < getCsvExportLimit() && items.hasNext()) {
result.add(items.next()); result.add(items.next());
} }
} }
return result.iterator(); return result.iterator();
} }
@Override
public int getCsvExportLimit() {
if (csxExportLimit == -1) {
csxExportLimit = configurationService.getIntProperty("bulkedit.export.max.items", 500);
}
return csxExportLimit;
}
} }

View File

@@ -44,7 +44,8 @@ public interface MetadataDSpaceCsvExportService {
* @return A DSpaceCSV object containing the exported information * @return A DSpaceCSV object containing the exported information
* @throws Exception If something goes wrong * @throws Exception If something goes wrong
*/ */
public DSpaceCSV export(Context context, Iterator<Item> toExport, boolean exportAll) throws Exception; public DSpaceCSV export(Context context, Iterator<Item> toExport,
boolean exportAll, DSpaceRunnableHandler handler) throws Exception;
/** /**
* This method will export all the Items within the given Community to a DSpaceCSV * This method will export all the Items within the given Community to a DSpaceCSV
@@ -54,6 +55,9 @@ public interface MetadataDSpaceCsvExportService {
* @return A DSpaceCSV object containing the exported information * @return A DSpaceCSV object containing the exported information
* @throws Exception If something goes wrong * @throws Exception If something goes wrong
*/ */
public DSpaceCSV export(Context context, Community community, boolean exportAll) throws Exception; public DSpaceCSV export(Context context, Community community,
boolean exportAll, DSpaceRunnableHandler handler) throws Exception;
int getCsvExportLimit();
} }

View File

@@ -16,6 +16,7 @@ import java.util.List;
import org.dspace.AbstractIntegrationTestWithDatabase; import org.dspace.AbstractIntegrationTestWithDatabase;
import org.dspace.app.bulkedit.DSpaceCSV; import org.dspace.app.bulkedit.DSpaceCSV;
import org.dspace.app.bulkedit.DSpaceCSVLine; import org.dspace.app.bulkedit.DSpaceCSVLine;
import org.dspace.app.scripts.handler.impl.TestDSpaceRunnableHandler;
import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CollectionBuilder;
import org.dspace.builder.CommunityBuilder; import org.dspace.builder.CommunityBuilder;
import org.dspace.builder.ItemBuilder; import org.dspace.builder.ItemBuilder;
@@ -31,6 +32,9 @@ import org.junit.Test;
*/ */
public class MetadataDSpaceCsvExportServiceImplIT public class MetadataDSpaceCsvExportServiceImplIT
extends AbstractIntegrationTestWithDatabase { extends AbstractIntegrationTestWithDatabase {
TestDSpaceRunnableHandler testDSpaceRunnableHandler = new TestDSpaceRunnableHandler();
/** /**
* Test of handleExport method, of class MetadataDSpaceCsvExportServiceImpl. * Test of handleExport method, of class MetadataDSpaceCsvExportServiceImpl.
* @throws java.lang.Exception passed through. * @throws java.lang.Exception passed through.
@@ -66,7 +70,7 @@ public class MetadataDSpaceCsvExportServiceImplIT
boolean exportAll = false; boolean exportAll = false;
MetadataDSpaceCsvExportServiceImpl instance = new MetadataDSpaceCsvExportServiceImpl(); MetadataDSpaceCsvExportServiceImpl instance = new MetadataDSpaceCsvExportServiceImpl();
DSpaceCSV expResult = null; DSpaceCSV expResult = null;
DSpaceCSV result = instance.export(context, toExport, exportAll); DSpaceCSV result = instance.export(context, toExport, exportAll, testDSpaceRunnableHandler);
assertEquals(expResult, result); assertEquals(expResult, result);
// TODO review the generated test code and remove the default call to fail. // TODO review the generated test code and remove the default call to fail.
fail("The test case is a prototype."); fail("The test case is a prototype.");
@@ -105,7 +109,7 @@ public class MetadataDSpaceCsvExportServiceImplIT
.getServiceManager() .getServiceManager()
.getServiceByName(MetadataDSpaceCsvExportServiceImpl.class.getCanonicalName(), .getServiceByName(MetadataDSpaceCsvExportServiceImpl.class.getCanonicalName(),
MetadataDSpaceCsvExportService.class); MetadataDSpaceCsvExportService.class);
DSpaceCSV result = instance.export(context, parentCommunity, false); DSpaceCSV result = instance.export(context, parentCommunity, false, testDSpaceRunnableHandler);
// Examine the result. // Examine the result.
List<DSpaceCSVLine> csvLines = result.getCSVLines(); List<DSpaceCSVLine> csvLines = result.getCSVLines();

View File

@@ -40,3 +40,8 @@ bulkedit.allow-bulk-deletion = dspace.agreements.end-user
# By default this is set to 100 # By default this is set to 100
bulkedit.change.commit.count = 100 bulkedit.change.commit.count = 100
### Bulkedit Metadata export settings
# The maximum amount of items that can be exported using the "metadata-export" / "metadata-export-search" script
# Recommend to keep this at a feasible number, as exporting large amounts of items can be resource intensive
# If not set, this will default to 500 items
# bulkedit.export.max.items = 500

View File

@@ -59,3 +59,4 @@ rest.properties.exposed = ldn.notify.inbox
rest.properties.exposed = handle.canonical.prefix rest.properties.exposed = handle.canonical.prefix
rest.properties.exposed = contentreport.enable rest.properties.exposed = contentreport.enable
rest.properties.exposed = duplicate.enable rest.properties.exposed = duplicate.enable
rest.properties.exposed = bulkedit.export.max.items