mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-14 05:23:14 +00:00
90830: Issue 8125: Import items stale in discovery
This commit is contained in:
@@ -1,3 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* The contents of this file are subject to the license and copyright
|
||||||
|
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||||
|
* tree and available online at
|
||||||
|
*
|
||||||
|
* http://www.dspace.org/license/
|
||||||
|
*/
|
||||||
package org.dspace.app.itemdbstatus;
|
package org.dspace.app.itemdbstatus;
|
||||||
|
|
||||||
import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD;
|
import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD;
|
||||||
@@ -5,8 +12,8 @@ import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
import java.util.Calendar;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.UUID;
|
|
||||||
|
|
||||||
import org.apache.commons.cli.ParseException;
|
import org.apache.commons.cli.ParseException;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
@@ -14,36 +21,38 @@ import org.apache.solr.client.solrj.SolrQuery;
|
|||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.dspace.content.Item;
|
|
||||||
import org.dspace.content.factory.ContentServiceFactory;
|
|
||||||
import org.dspace.content.service.ItemService;
|
|
||||||
import org.dspace.core.Context;
|
import org.dspace.core.Context;
|
||||||
import org.dspace.discovery.DiscoverQuery;
|
|
||||||
import org.dspace.discovery.DiscoverResult;
|
|
||||||
import org.dspace.discovery.IndexableObject;
|
import org.dspace.discovery.IndexableObject;
|
||||||
import org.dspace.discovery.IndexingService;
|
import org.dspace.discovery.IndexingService;
|
||||||
import org.dspace.discovery.SearchService;
|
|
||||||
import org.dspace.discovery.SearchServiceException;
|
import org.dspace.discovery.SearchServiceException;
|
||||||
import org.dspace.discovery.SearchUtils;
|
import org.dspace.discovery.SearchUtils;
|
||||||
import org.dspace.discovery.SolrSearchCore;
|
import org.dspace.discovery.SolrSearchCore;
|
||||||
import org.dspace.discovery.indexobject.IndexableItem;
|
import org.dspace.discovery.indexobject.IndexableItem;
|
||||||
import org.dspace.discovery.indexobject.factory.IndexObjectFactoryFactory;
|
import org.dspace.discovery.indexobject.factory.IndexObjectFactoryFactory;
|
||||||
import org.dspace.scripts.DSpaceRunnable;
|
import org.dspace.scripts.DSpaceRunnable;
|
||||||
|
import org.dspace.services.ConfigurationService;
|
||||||
import org.dspace.services.factory.DSpaceServicesFactory;
|
import org.dspace.services.factory.DSpaceServicesFactory;
|
||||||
|
import org.dspace.util.SolrUtils;
|
||||||
import org.dspace.utils.DSpace;
|
import org.dspace.utils.DSpace;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by kristof on 19/04/2022
|
* {@link DSpaceRunnable} implementation to update solr items with "predb" status to either:
|
||||||
|
* - Delete them from solr if they're not present in the database
|
||||||
|
* - Remove their status if they're present in the database
|
||||||
*/
|
*/
|
||||||
public class ItemDatabaseStatusCli extends DSpaceRunnable<ItemDatabaseStatusCliScriptConfiguration> {
|
public class ItemDatabaseStatusCli extends DSpaceRunnable<ItemDatabaseStatusCliScriptConfiguration> {
|
||||||
/* Log4j logger */
|
/* Log4j logger */
|
||||||
private static final Logger log = Logger.getLogger(ItemDatabaseStatusCli.class);
|
private static final Logger log = Logger.getLogger(ItemDatabaseStatusCli.class);
|
||||||
|
|
||||||
private SearchService searchService;
|
public static final String TIME_UNTIL_REINDEX_PROPERTY = "item-database-status.time-until-reindex";
|
||||||
private ItemService itemService;
|
|
||||||
private IndexingService indexingService;
|
private IndexingService indexingService;
|
||||||
private SolrSearchCore solrSearchCore;
|
private SolrSearchCore solrSearchCore;
|
||||||
private IndexObjectFactoryFactory indexObjectServiceFactory;
|
private IndexObjectFactoryFactory indexObjectServiceFactory;
|
||||||
|
private ConfigurationService configurationService;
|
||||||
|
|
||||||
|
private int timeUntilReindex = 0;
|
||||||
|
private String maxTime;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ItemDatabaseStatusCliScriptConfiguration getScriptConfiguration() {
|
public ItemDatabaseStatusCliScriptConfiguration getScriptConfiguration() {
|
||||||
@@ -53,18 +62,20 @@ public class ItemDatabaseStatusCli extends DSpaceRunnable<ItemDatabaseStatusCliS
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setup() throws ParseException {
|
public void setup() throws ParseException {
|
||||||
searchService = SearchUtils.getSearchService();
|
|
||||||
itemService = ContentServiceFactory.getInstance().getItemService();
|
|
||||||
indexingService = DSpaceServicesFactory.getInstance().getServiceManager()
|
indexingService = DSpaceServicesFactory.getInstance().getServiceManager()
|
||||||
.getServiceByName(IndexingService.class.getName(), IndexingService.class);
|
.getServiceByName(IndexingService.class.getName(), IndexingService.class);
|
||||||
solrSearchCore = DSpaceServicesFactory.getInstance().getServiceManager()
|
solrSearchCore = DSpaceServicesFactory.getInstance().getServiceManager()
|
||||||
.getServiceByName(SolrSearchCore.class.getName(), SolrSearchCore.class);
|
.getServiceByName(SolrSearchCore.class.getName(), SolrSearchCore.class);
|
||||||
indexObjectServiceFactory = IndexObjectFactoryFactory.getInstance();
|
indexObjectServiceFactory = IndexObjectFactoryFactory.getInstance();
|
||||||
|
configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void internalRun() throws Exception {
|
public void internalRun() throws Exception {
|
||||||
logAndOut("Starting Item Database Status update...");
|
logInfoAndOut("Starting Item Database Status update...");
|
||||||
|
|
||||||
|
timeUntilReindex = getTimeUntilReindex();
|
||||||
|
maxTime = getMaxTime();
|
||||||
|
|
||||||
Context context = new Context();
|
Context context = new Context();
|
||||||
|
|
||||||
@@ -81,15 +92,20 @@ public class ItemDatabaseStatusCli extends DSpaceRunnable<ItemDatabaseStatusCliS
|
|||||||
SolrQuery solrQuery = new SolrQuery();
|
SolrQuery solrQuery = new SolrQuery();
|
||||||
solrQuery.setQuery(STATUS_FIELD + ":" + STATUS_FIELD_PREDB);
|
solrQuery.setQuery(STATUS_FIELD + ":" + STATUS_FIELD_PREDB);
|
||||||
solrQuery.addFilterQuery(SearchUtils.RESOURCE_TYPE_FIELD + ":" + IndexableItem.TYPE);
|
solrQuery.addFilterQuery(SearchUtils.RESOURCE_TYPE_FIELD + ":" + IndexableItem.TYPE);
|
||||||
|
String dateRangeFilter = SearchUtils.LAST_INDEXED_FIELD + ":[* TO " + maxTime + "]";
|
||||||
|
logDebugAndOut("Date range filter used; " + dateRangeFilter);
|
||||||
|
solrQuery.addFilterQuery(dateRangeFilter);
|
||||||
solrQuery.addField(SearchUtils.RESOURCE_ID_FIELD);
|
solrQuery.addField(SearchUtils.RESOURCE_ID_FIELD);
|
||||||
solrQuery.addField(SearchUtils.RESOURCE_UNIQUE_ID);
|
solrQuery.addField(SearchUtils.RESOURCE_UNIQUE_ID);
|
||||||
QueryResponse response = solrSearchCore.getSolr().query(solrQuery, solrSearchCore.REQUEST_METHOD);
|
QueryResponse response = solrSearchCore.getSolr().query(solrQuery, solrSearchCore.REQUEST_METHOD);
|
||||||
|
|
||||||
if (response != null) {
|
if (response != null) {
|
||||||
|
logInfoAndOut(response.getResults().size() + " items found to process");
|
||||||
|
|
||||||
for (SolrDocument doc : response.getResults()) {
|
for (SolrDocument doc : response.getResults()) {
|
||||||
String uuid = (String) doc.getFirstValue(SearchUtils.RESOURCE_ID_FIELD);
|
String uuid = (String) doc.getFirstValue(SearchUtils.RESOURCE_ID_FIELD);
|
||||||
String uniqueId = (String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID);
|
String uniqueId = (String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID);
|
||||||
logAndOut("Processing item with UUID: " + uuid);
|
logDebugAndOut("Processing item with UUID: " + uuid);
|
||||||
|
|
||||||
Optional<IndexableObject> indexableObject = Optional.empty();
|
Optional<IndexableObject> indexableObject = Optional.empty();
|
||||||
try {
|
try {
|
||||||
@@ -102,10 +118,10 @@ public class ItemDatabaseStatusCli extends DSpaceRunnable<ItemDatabaseStatusCliS
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
if (indexableObject.isPresent()) {
|
if (indexableObject.isPresent()) {
|
||||||
logAndOut("Item exists in DB, updating solr document");
|
logDebugAndOut("Item exists in DB, updating solr document");
|
||||||
updateItem(context, indexableObject.get());
|
updateItem(context, indexableObject.get());
|
||||||
} else {
|
} else {
|
||||||
logAndOut("Item doesn't exist in DB, removing solr document");
|
logDebugAndOut("Item doesn't exist in DB, removing solr document");
|
||||||
removeItem(context, uniqueId);
|
removeItem(context, uniqueId);
|
||||||
}
|
}
|
||||||
} catch (SQLException | IOException e) {
|
} catch (SQLException | IOException e) {
|
||||||
@@ -125,8 +141,25 @@ public class ItemDatabaseStatusCli extends DSpaceRunnable<ItemDatabaseStatusCliS
|
|||||||
indexingService.unIndexContent(context, uniqueId);
|
indexingService.unIndexContent(context, uniqueId);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void logAndOut(String message) {
|
private String getMaxTime() {
|
||||||
|
Calendar cal = Calendar.getInstance();
|
||||||
|
if (timeUntilReindex > 0) {
|
||||||
|
cal.add(Calendar.MILLISECOND, -timeUntilReindex);
|
||||||
|
}
|
||||||
|
return SolrUtils.getDateFormatter().format(cal.getTime());
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getTimeUntilReindex() {
|
||||||
|
return configurationService.getIntProperty(TIME_UNTIL_REINDEX_PROPERTY, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void logInfoAndOut(String message) {
|
||||||
log.info(message);
|
log.info(message);
|
||||||
System.out.println(message);
|
System.out.println(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void logDebugAndOut(String message) {
|
||||||
|
log.debug(message);
|
||||||
|
System.out.println(message);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* The contents of this file are subject to the license and copyright
|
||||||
|
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||||
|
* tree and available online at
|
||||||
|
*
|
||||||
|
* http://www.dspace.org/license/
|
||||||
|
*/
|
||||||
package org.dspace.app.itemdbstatus;
|
package org.dspace.app.itemdbstatus;
|
||||||
|
|
||||||
import org.apache.commons.cli.Options;
|
import org.apache.commons.cli.Options;
|
||||||
@@ -5,7 +12,7 @@ import org.dspace.core.Context;
|
|||||||
import org.dspace.scripts.configuration.ScriptConfiguration;
|
import org.dspace.scripts.configuration.ScriptConfiguration;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by kristof on 19/04/2022
|
* The {@link ScriptConfiguration} for the {@link ItemDatabaseStatusCli} script.
|
||||||
*/
|
*/
|
||||||
public class ItemDatabaseStatusCliScriptConfiguration extends ScriptConfiguration<ItemDatabaseStatusCli> {
|
public class ItemDatabaseStatusCliScriptConfiguration extends ScriptConfiguration<ItemDatabaseStatusCli> {
|
||||||
private Class<ItemDatabaseStatusCli> dspaceRunnableClass;
|
private Class<ItemDatabaseStatusCli> dspaceRunnableClass;
|
||||||
|
@@ -15,6 +15,8 @@ import java.util.Set;
|
|||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.dspace.content.Bundle;
|
import org.dspace.content.Bundle;
|
||||||
import org.dspace.content.DSpaceObject;
|
import org.dspace.content.DSpaceObject;
|
||||||
|
import org.dspace.content.factory.ContentServiceFactory;
|
||||||
|
import org.dspace.content.service.ItemService;
|
||||||
import org.dspace.core.Constants;
|
import org.dspace.core.Constants;
|
||||||
import org.dspace.core.Context;
|
import org.dspace.core.Context;
|
||||||
import org.dspace.discovery.indexobject.factory.IndexFactory;
|
import org.dspace.discovery.indexobject.factory.IndexFactory;
|
||||||
@@ -38,7 +40,7 @@ public class IndexEventConsumer implements Consumer {
|
|||||||
|
|
||||||
// collect Items, Collections, Communities that need indexing
|
// collect Items, Collections, Communities that need indexing
|
||||||
private Set<IndexableObject> objectsToUpdate = new HashSet<>();
|
private Set<IndexableObject> objectsToUpdate = new HashSet<>();
|
||||||
// collect freshly created Items that need indexing (requires pre-db status)
|
// collect freshly created Items that need indexing and require pre-db status
|
||||||
private Set<IndexableObject> createdItemsToUpdate = new HashSet<>();
|
private Set<IndexableObject> createdItemsToUpdate = new HashSet<>();
|
||||||
|
|
||||||
// unique search IDs to delete
|
// unique search IDs to delete
|
||||||
@@ -50,6 +52,8 @@ public class IndexEventConsumer implements Consumer {
|
|||||||
|
|
||||||
IndexObjectFactoryFactory indexObjectServiceFactory = IndexObjectFactoryFactory.getInstance();
|
IndexObjectFactoryFactory indexObjectServiceFactory = IndexObjectFactoryFactory.getInstance();
|
||||||
|
|
||||||
|
ItemService itemService = ContentServiceFactory.getInstance().getItemService();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize() throws Exception {
|
public void initialize() throws Exception {
|
||||||
|
|
||||||
@@ -147,12 +151,9 @@ public class IndexEventConsumer implements Consumer {
|
|||||||
String detail = indexableObjectService.getType() + "-" + event.getSubjectID().toString();
|
String detail = indexableObjectService.getType() + "-" + event.getSubjectID().toString();
|
||||||
uniqueIdsToDelete.add(detail);
|
uniqueIdsToDelete.add(detail);
|
||||||
}
|
}
|
||||||
if (st == Constants.ITEM && et == Event.CREATE && object == null) {
|
|
||||||
createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject));
|
|
||||||
} else {
|
|
||||||
objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject));
|
objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Event.REMOVE:
|
case Event.REMOVE:
|
||||||
@@ -170,7 +171,13 @@ public class IndexEventConsumer implements Consumer {
|
|||||||
// also update the object in order to index mapped/unmapped Items
|
// also update the object in order to index mapped/unmapped Items
|
||||||
if (subject != null &&
|
if (subject != null &&
|
||||||
subject.getType() == Constants.COLLECTION && object.getType() == Constants.ITEM) {
|
subject.getType() == Constants.COLLECTION && object.getType() == Constants.ITEM) {
|
||||||
|
// If the item doesn't exist in the database yet, add it to createdItemsToUpdate
|
||||||
|
// Otherwise use the standard objectsToUpdate
|
||||||
|
if (itemService.find(ctx, object.getID()) == null) {
|
||||||
createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object));
|
createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object));
|
||||||
|
} else {
|
||||||
|
objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@@ -759,6 +759,7 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
|||||||
solrQuery.addField(SearchUtils.RESOURCE_TYPE_FIELD);
|
solrQuery.addField(SearchUtils.RESOURCE_TYPE_FIELD);
|
||||||
solrQuery.addField(SearchUtils.RESOURCE_ID_FIELD);
|
solrQuery.addField(SearchUtils.RESOURCE_ID_FIELD);
|
||||||
solrQuery.addField(SearchUtils.RESOURCE_UNIQUE_ID);
|
solrQuery.addField(SearchUtils.RESOURCE_UNIQUE_ID);
|
||||||
|
solrQuery.addField(STATUS_FIELD);
|
||||||
|
|
||||||
if (discoveryQuery.isSpellCheck()) {
|
if (discoveryQuery.isSpellCheck()) {
|
||||||
solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query);
|
solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query);
|
||||||
@@ -766,9 +767,6 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
|||||||
solrQuery.setParam("spellcheck", Boolean.TRUE);
|
solrQuery.setParam("spellcheck", Boolean.TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exclude items with status:predb to avoid solr docs being removed during large imports (Issue #8125)
|
|
||||||
solrQuery.addFilterQuery("!" + STATUS_FIELD + ":" + STATUS_FIELD_PREDB);
|
|
||||||
|
|
||||||
for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) {
|
for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) {
|
||||||
String filterQuery = discoveryQuery.getFilterQueries().get(i);
|
String filterQuery = discoveryQuery.getFilterQueries().get(i);
|
||||||
solrQuery.addFilterQuery(filterQuery);
|
solrQuery.addFilterQuery(filterQuery);
|
||||||
@@ -912,12 +910,15 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
|||||||
// Enables solr to remove documents related to items not on database anymore (Stale)
|
// Enables solr to remove documents related to items not on database anymore (Stale)
|
||||||
// if maxAttemps is greater than 0 cleanup the index on each step
|
// if maxAttemps is greater than 0 cleanup the index on each step
|
||||||
if (maxAttempts >= 0) {
|
if (maxAttempts >= 0) {
|
||||||
|
Object statusObj = doc.getFirstValue(STATUS_FIELD);
|
||||||
|
if (!(statusObj instanceof String && statusObj.equals(STATUS_FIELD_PREDB))) {
|
||||||
zombieDocs.add((String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID));
|
zombieDocs.add((String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID));
|
||||||
// avoid to process the response except if we are in the last allowed execution.
|
// avoid to process the response except if we are in the last allowed execution.
|
||||||
// When maxAttempts is 0 this will be just the first and last run as the
|
// When maxAttempts is 0 this will be just the first and last run as the
|
||||||
// executionCount is increased at the start of the loop it will be equals to 1
|
// executionCount is increased at the start of the loop it will be equals to 1
|
||||||
skipLoadingResponse = maxAttempts + 1 != executionCount;
|
skipLoadingResponse = maxAttempts + 1 != executionCount;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!skipLoadingResponse) {
|
if (!skipLoadingResponse) {
|
||||||
|
@@ -1533,6 +1533,13 @@ mail.helpdesk.name = Help Desk
|
|||||||
request.item.helpdesk.override = false
|
request.item.helpdesk.override = false
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------------------------------#
|
||||||
|
#----------ITEM DATABASE STATUS SCRIPT CONFIGURATION------------#
|
||||||
|
#---------------------------------------------------------------#
|
||||||
|
|
||||||
|
# The max amount of time allowed for an item to be present in solr with predb status without needing a reindex (in ms)
|
||||||
|
item-database-status.time-until-reindex = 600000
|
||||||
|
|
||||||
#------------------------------------------------------------------#
|
#------------------------------------------------------------------#
|
||||||
#-------------------MODULE CONFIGURATIONS--------------------------#
|
#-------------------MODULE CONFIGURATIONS--------------------------#
|
||||||
#------------------------------------------------------------------#
|
#------------------------------------------------------------------#
|
||||||
|
Reference in New Issue
Block a user