diff --git a/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCli.java b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCli.java index 2dbcf12444..916431e747 100644 --- a/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCli.java +++ b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCli.java @@ -1,3 +1,10 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ package org.dspace.app.itemdbstatus; import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD; @@ -5,8 +12,8 @@ import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD import java.io.IOException; import java.sql.SQLException; +import java.util.Calendar; import java.util.Optional; -import java.util.UUID; import org.apache.commons.cli.ParseException; import org.apache.log4j.Logger; @@ -14,36 +21,38 @@ import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; -import org.dspace.content.Item; -import org.dspace.content.factory.ContentServiceFactory; -import org.dspace.content.service.ItemService; import org.dspace.core.Context; -import org.dspace.discovery.DiscoverQuery; -import org.dspace.discovery.DiscoverResult; import org.dspace.discovery.IndexableObject; import org.dspace.discovery.IndexingService; -import org.dspace.discovery.SearchService; import org.dspace.discovery.SearchServiceException; import org.dspace.discovery.SearchUtils; import org.dspace.discovery.SolrSearchCore; import org.dspace.discovery.indexobject.IndexableItem; import org.dspace.discovery.indexobject.factory.IndexObjectFactoryFactory; import org.dspace.scripts.DSpaceRunnable; +import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.util.SolrUtils; import org.dspace.utils.DSpace; /** - * Created by kristof on 19/04/2022 + * {@link DSpaceRunnable} implementation to update solr items with "predb" status to either: + * - Delete them from solr if they're not present in the database + * - Remove their status if they're present in the database */ public class ItemDatabaseStatusCli extends DSpaceRunnable { /* Log4j logger */ private static final Logger log = Logger.getLogger(ItemDatabaseStatusCli.class); - private SearchService searchService; - private ItemService itemService; + public static final String TIME_UNTIL_REINDEX_PROPERTY = "item-database-status.time-until-reindex"; + private IndexingService indexingService; private SolrSearchCore solrSearchCore; private IndexObjectFactoryFactory indexObjectServiceFactory; + private ConfigurationService configurationService; + + private int timeUntilReindex = 0; + private String maxTime; @Override public ItemDatabaseStatusCliScriptConfiguration getScriptConfiguration() { @@ -53,18 +62,20 @@ public class ItemDatabaseStatusCli extends DSpaceRunnable indexableObject = Optional.empty(); try { @@ -102,10 +118,10 @@ public class ItemDatabaseStatusCli extends DSpaceRunnable 0) { + cal.add(Calendar.MILLISECOND, -timeUntilReindex); + } + return SolrUtils.getDateFormatter().format(cal.getTime()); + } + + private int getTimeUntilReindex() { + return configurationService.getIntProperty(TIME_UNTIL_REINDEX_PROPERTY, 0); + } + + private void logInfoAndOut(String message) { log.info(message); System.out.println(message); } + + private void logDebugAndOut(String message) { + log.debug(message); + System.out.println(message); + } } diff --git a/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCliScriptConfiguration.java b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCliScriptConfiguration.java index a7d472b6e2..d79caf3159 100644 --- a/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCliScriptConfiguration.java +++ b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCliScriptConfiguration.java @@ -1,3 +1,10 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ package org.dspace.app.itemdbstatus; import org.apache.commons.cli.Options; @@ -5,7 +12,7 @@ import org.dspace.core.Context; import org.dspace.scripts.configuration.ScriptConfiguration; /** - * Created by kristof on 19/04/2022 + * The {@link ScriptConfiguration} for the {@link ItemDatabaseStatusCli} script. */ public class ItemDatabaseStatusCliScriptConfiguration extends ScriptConfiguration { private Class dspaceRunnableClass; diff --git a/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java b/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java index 7b470ee073..0f60373ff9 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java +++ b/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java @@ -15,6 +15,8 @@ import java.util.Set; import org.apache.logging.log4j.Logger; import org.dspace.content.Bundle; import org.dspace.content.DSpaceObject; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.ItemService; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.discovery.indexobject.factory.IndexFactory; @@ -38,7 +40,7 @@ public class IndexEventConsumer implements Consumer { // collect Items, Collections, Communities that need indexing private Set objectsToUpdate = new HashSet<>(); - // collect freshly created Items that need indexing (requires pre-db status) + // collect freshly created Items that need indexing and require pre-db status private Set createdItemsToUpdate = new HashSet<>(); // unique search IDs to delete @@ -50,6 +52,8 @@ public class IndexEventConsumer implements Consumer { IndexObjectFactoryFactory indexObjectServiceFactory = IndexObjectFactoryFactory.getInstance(); + ItemService itemService = ContentServiceFactory.getInstance().getItemService(); + @Override public void initialize() throws Exception { @@ -147,11 +151,8 @@ public class IndexEventConsumer implements Consumer { String detail = indexableObjectService.getType() + "-" + event.getSubjectID().toString(); uniqueIdsToDelete.add(detail); } - if (st == Constants.ITEM && et == Event.CREATE && object == null) { - createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject)); - } else { - objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject)); - } + + objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject)); } break; @@ -170,7 +171,13 @@ public class IndexEventConsumer implements Consumer { // also update the object in order to index mapped/unmapped Items if (subject != null && subject.getType() == Constants.COLLECTION && object.getType() == Constants.ITEM) { - createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object)); + // If the item doesn't exist in the database yet, add it to createdItemsToUpdate + // Otherwise use the standard objectsToUpdate + if (itemService.find(ctx, object.getID()) == null) { + createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object)); + } else { + objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object)); + } } } break; diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java index 2203966982..c1ae1226ce 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java @@ -759,6 +759,7 @@ public class SolrServiceImpl implements SearchService, IndexingService { solrQuery.addField(SearchUtils.RESOURCE_TYPE_FIELD); solrQuery.addField(SearchUtils.RESOURCE_ID_FIELD); solrQuery.addField(SearchUtils.RESOURCE_UNIQUE_ID); + solrQuery.addField(STATUS_FIELD); if (discoveryQuery.isSpellCheck()) { solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query); @@ -766,9 +767,6 @@ public class SolrServiceImpl implements SearchService, IndexingService { solrQuery.setParam("spellcheck", Boolean.TRUE); } - // Exclude items with status:predb to avoid solr docs being removed during large imports (Issue #8125) - solrQuery.addFilterQuery("!" + STATUS_FIELD + ":" + STATUS_FIELD_PREDB); - for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) { String filterQuery = discoveryQuery.getFilterQueries().get(i); solrQuery.addFilterQuery(filterQuery); @@ -912,11 +910,14 @@ public class SolrServiceImpl implements SearchService, IndexingService { // Enables solr to remove documents related to items not on database anymore (Stale) // if maxAttemps is greater than 0 cleanup the index on each step if (maxAttempts >= 0) { - zombieDocs.add((String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID)); - // avoid to process the response except if we are in the last allowed execution. - // When maxAttempts is 0 this will be just the first and last run as the - // executionCount is increased at the start of the loop it will be equals to 1 - skipLoadingResponse = maxAttempts + 1 != executionCount; + Object statusObj = doc.getFirstValue(STATUS_FIELD); + if (!(statusObj instanceof String && statusObj.equals(STATUS_FIELD_PREDB))) { + zombieDocs.add((String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID)); + // avoid to process the response except if we are in the last allowed execution. + // When maxAttempts is 0 this will be just the first and last run as the + // executionCount is increased at the start of the loop it will be equals to 1 + skipLoadingResponse = maxAttempts + 1 != executionCount; + } } continue; } diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index 5a307ebfc6..fd4ad59769 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -1533,6 +1533,13 @@ mail.helpdesk.name = Help Desk request.item.helpdesk.override = false +#---------------------------------------------------------------# +#----------ITEM DATABASE STATUS SCRIPT CONFIGURATION------------# +#---------------------------------------------------------------# + +# The max amount of time allowed for an item to be present in solr with predb status without needing a reindex (in ms) +item-database-status.time-until-reindex = 600000 + #------------------------------------------------------------------# #-------------------MODULE CONFIGURATIONS--------------------------# #------------------------------------------------------------------#