diff --git a/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCli.java b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCli.java new file mode 100644 index 0000000000..2dbcf12444 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCli.java @@ -0,0 +1,132 @@ +package org.dspace.app.itemdbstatus; + +import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD; +import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD_PREDB; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Optional; +import java.util.UUID; + +import org.apache.commons.cli.ParseException; +import org.apache.log4j.Logger; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.dspace.content.Item; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.ItemService; +import org.dspace.core.Context; +import org.dspace.discovery.DiscoverQuery; +import org.dspace.discovery.DiscoverResult; +import org.dspace.discovery.IndexableObject; +import org.dspace.discovery.IndexingService; +import org.dspace.discovery.SearchService; +import org.dspace.discovery.SearchServiceException; +import org.dspace.discovery.SearchUtils; +import org.dspace.discovery.SolrSearchCore; +import org.dspace.discovery.indexobject.IndexableItem; +import org.dspace.discovery.indexobject.factory.IndexObjectFactoryFactory; +import org.dspace.scripts.DSpaceRunnable; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.utils.DSpace; + +/** + * Created by kristof on 19/04/2022 + */ +public class ItemDatabaseStatusCli extends DSpaceRunnable { + /* Log4j logger */ + private static final Logger log = Logger.getLogger(ItemDatabaseStatusCli.class); + + private SearchService searchService; + private ItemService itemService; + private IndexingService indexingService; + private SolrSearchCore solrSearchCore; + private IndexObjectFactoryFactory indexObjectServiceFactory; + + @Override + public ItemDatabaseStatusCliScriptConfiguration getScriptConfiguration() { + return new DSpace().getServiceManager() + .getServiceByName("item-database-status", ItemDatabaseStatusCliScriptConfiguration.class); + } + + @Override + public void setup() throws ParseException { + searchService = SearchUtils.getSearchService(); + itemService = ContentServiceFactory.getInstance().getItemService(); + indexingService = DSpaceServicesFactory.getInstance().getServiceManager() + .getServiceByName(IndexingService.class.getName(), IndexingService.class); + solrSearchCore = DSpaceServicesFactory.getInstance().getServiceManager() + .getServiceByName(SolrSearchCore.class.getName(), SolrSearchCore.class); + indexObjectServiceFactory = IndexObjectFactoryFactory.getInstance(); + } + + @Override + public void internalRun() throws Exception { + logAndOut("Starting Item Database Status update..."); + + Context context = new Context(); + + try { + context.turnOffAuthorisationSystem(); + performStatusUpdate(context); + } finally { + context.restoreAuthSystemState(); + context.complete(); + } + } + + private void performStatusUpdate(Context context) throws SearchServiceException, SolrServerException, IOException { + SolrQuery solrQuery = new SolrQuery(); + solrQuery.setQuery(STATUS_FIELD + ":" + STATUS_FIELD_PREDB); + solrQuery.addFilterQuery(SearchUtils.RESOURCE_TYPE_FIELD + ":" + IndexableItem.TYPE); + solrQuery.addField(SearchUtils.RESOURCE_ID_FIELD); + solrQuery.addField(SearchUtils.RESOURCE_UNIQUE_ID); + QueryResponse response = solrSearchCore.getSolr().query(solrQuery, solrSearchCore.REQUEST_METHOD); + + if (response != null) { + for (SolrDocument doc : response.getResults()) { + String uuid = (String) doc.getFirstValue(SearchUtils.RESOURCE_ID_FIELD); + String uniqueId = (String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID); + logAndOut("Processing item with UUID: " + uuid); + + Optional indexableObject = Optional.empty(); + try { + indexableObject = indexObjectServiceFactory + .getIndexableObjectFactory(uniqueId).findIndexableObject(context, uuid); + } catch (SQLException e) { + log.warn("An exception occurred when attempting to retrieve item with UUID \"" + uuid + + "\" from the database, removing related solr document", e); + } + + try { + if (indexableObject.isPresent()) { + logAndOut("Item exists in DB, updating solr document"); + updateItem(context, indexableObject.get()); + } else { + logAndOut("Item doesn't exist in DB, removing solr document"); + removeItem(context, uniqueId); + } + } catch (SQLException | IOException e) { + log.error(e.getMessage(), e); + } + } + } + + indexingService.commit(); + } + + private void updateItem(Context context, IndexableObject indexableObject) throws SQLException { + indexingService.indexContent(context, indexableObject, true); + } + + private void removeItem(Context context, String uniqueId) throws IOException, SQLException { + indexingService.unIndexContent(context, uniqueId); + } + + private void logAndOut(String message) { + log.info(message); + System.out.println(message); + } +} diff --git a/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCliScriptConfiguration.java b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCliScriptConfiguration.java new file mode 100644 index 0000000000..a7d472b6e2 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/app/itemdbstatus/ItemDatabaseStatusCliScriptConfiguration.java @@ -0,0 +1,35 @@ +package org.dspace.app.itemdbstatus; + +import org.apache.commons.cli.Options; +import org.dspace.core.Context; +import org.dspace.scripts.configuration.ScriptConfiguration; + +/** + * Created by kristof on 19/04/2022 + */ +public class ItemDatabaseStatusCliScriptConfiguration extends ScriptConfiguration { + private Class dspaceRunnableClass; + + @Override + public Class getDspaceRunnableClass() { + return dspaceRunnableClass; + } + + @Override + public void setDspaceRunnableClass(Class dspaceRunnableClass) { + this.dspaceRunnableClass = dspaceRunnableClass; + } + + @Override + public boolean isAllowedToExecute(Context context) { + return true; + } + + @Override + public Options getOptions() { + if (options == null) { + options = new Options(); + } + return options; + } +} diff --git a/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java b/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java index 5f1f8b0b0e..7b470ee073 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java +++ b/dspace-api/src/main/java/org/dspace/discovery/IndexEventConsumer.java @@ -7,6 +7,7 @@ */ package org.dspace.discovery; +import java.sql.SQLException; import java.util.HashSet; import java.util.Optional; import java.util.Set; @@ -37,6 +38,8 @@ public class IndexEventConsumer implements Consumer { // collect Items, Collections, Communities that need indexing private Set objectsToUpdate = new HashSet<>(); + // collect freshly created Items that need indexing (requires pre-db status) + private Set createdItemsToUpdate = new HashSet<>(); // unique search IDs to delete private Set uniqueIdsToDelete = new HashSet<>(); @@ -65,6 +68,7 @@ public class IndexEventConsumer implements Consumer { if (objectsToUpdate == null) { objectsToUpdate = new HashSet<>(); uniqueIdsToDelete = new HashSet<>(); + createdItemsToUpdate = new HashSet<>(); } int st = event.getSubjectType(); @@ -143,7 +147,11 @@ public class IndexEventConsumer implements Consumer { String detail = indexableObjectService.getType() + "-" + event.getSubjectID().toString(); uniqueIdsToDelete.add(detail); } - objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject)); + if (st == Constants.ITEM && et == Event.CREATE && object == null) { + createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject)); + } else { + objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject)); + } } break; @@ -162,7 +170,7 @@ public class IndexEventConsumer implements Consumer { // also update the object in order to index mapped/unmapped Items if (subject != null && subject.getType() == Constants.COLLECTION && object.getType() == Constants.ITEM) { - objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object)); + createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object)); } } break; @@ -209,23 +217,11 @@ public class IndexEventConsumer implements Consumer { } // update the changed Items not deleted because they were on create list for (IndexableObject iu : objectsToUpdate) { - /* we let all types through here and - * allow the search indexer to make - * decisions on indexing and/or removal - */ - iu.setIndexedObject(ctx.reloadEntity(iu.getIndexedObject())); - String uniqueIndexID = iu.getUniqueIndexID(); - if (uniqueIndexID != null) { - try { - indexer.indexContent(ctx, iu, true, false); - log.debug("Indexed " - + iu.getTypeText() - + ", id=" + iu.getID() - + ", unique_id=" + uniqueIndexID); - } catch (Exception e) { - log.error("Failed while indexing object: ", e); - } - } + indexObject(ctx, iu, false); + } + // update the created Items with a pre-db status + for (IndexableObject iu : createdItemsToUpdate) { + indexObject(ctx, iu, true); } } finally { if (!objectsToUpdate.isEmpty() || !uniqueIdsToDelete.isEmpty()) { @@ -235,6 +231,27 @@ public class IndexEventConsumer implements Consumer { // "free" the resources objectsToUpdate.clear(); uniqueIdsToDelete.clear(); + createdItemsToUpdate.clear(); + } + } + } + + private void indexObject(Context ctx, IndexableObject iu, boolean preDb) throws SQLException { + /* we let all types through here and + * allow the search indexer to make + * decisions on indexing and/or removal + */ + iu.setIndexedObject(ctx.reloadEntity(iu.getIndexedObject())); + String uniqueIndexID = iu.getUniqueIndexID(); + if (uniqueIndexID != null) { + try { + indexer.indexContent(ctx, iu, true, false, preDb); + log.debug("Indexed " + + iu.getTypeText() + + ", id=" + iu.getID() + + ", unique_id=" + uniqueIndexID); + } catch (Exception e) { + log.error("Failed while indexing object: ", e); } } } diff --git a/dspace-api/src/main/java/org/dspace/discovery/IndexingService.java b/dspace-api/src/main/java/org/dspace/discovery/IndexingService.java index 46795d759e..e4b7c0340e 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/IndexingService.java +++ b/dspace-api/src/main/java/org/dspace/discovery/IndexingService.java @@ -30,6 +30,9 @@ public interface IndexingService { void indexContent(Context context, IndexableObject dso, boolean force, boolean commit) throws SQLException, SearchServiceException; + void indexContent(Context context, IndexableObject dso, + boolean force, boolean commit, boolean preDb) throws SQLException, SearchServiceException; + void unIndexContent(Context context, IndexableObject dso) throws SQLException, IOException; diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java index 0791824085..2203966982 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java @@ -8,6 +8,8 @@ package org.dspace.discovery; import static java.util.stream.Collectors.joining; +import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD; +import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD_PREDB; import java.io.IOException; import java.io.PrintWriter; @@ -166,6 +168,16 @@ public class SolrServiceImpl implements SearchService, IndexingService { indexableObjectService.writeDocument(context, indexableObject, solrInputDocument); } + protected void update(Context context, IndexFactory indexableObjectService, + IndexableObject indexableObject, boolean preDB) throws IOException, SQLException, SolrServerException { + if (preDB) { + final SolrInputDocument solrInputDocument = indexableObjectService.buildNewDocument(context, indexableObject); + indexableObjectService.writeDocument(context, indexableObject, solrInputDocument); + } else { + update(context, indexableObjectService, indexableObject); + } + } + /** * unIndex removes an Item, Collection, or Community * @@ -754,6 +766,9 @@ public class SolrServiceImpl implements SearchService, IndexingService { solrQuery.setParam("spellcheck", Boolean.TRUE); } + // Exclude items with status:predb to avoid solr docs being removed during large imports (Issue #8125) + solrQuery.addFilterQuery("!" + STATUS_FIELD + ":" + STATUS_FIELD_PREDB); + for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) { String filterQuery = discoveryQuery.getFilterQueries().get(i); solrQuery.addFilterQuery(filterQuery); @@ -1384,6 +1399,28 @@ public class SolrServiceImpl implements SearchService, IndexingService { } } + @Override + public void indexContent(Context context, IndexableObject indexableObject, boolean force, + boolean commit, boolean preDb) throws SearchServiceException, SQLException { + if (preDb) { + try { + final IndexFactory indexableObjectFactory = indexObjectServiceFactory. + getIndexableObjectFactory(indexableObject); + if (force || requiresIndexing(indexableObject.getUniqueIndexID(), indexableObject.getLastModified())) { + update(context, indexableObjectFactory, indexableObject, true); + log.info(LogManager.getHeader(context, "indexed_object", indexableObject.getUniqueIndexID())); + } + } catch (IOException | SQLException | SolrServerException | SearchServiceException e) { + log.error(e.getMessage(), e); + } + } else { + indexContent(context, indexableObject, force); + } + if (commit) { + commit(); + } + } + @Override public void commit() throws SearchServiceException { try { diff --git a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java index 8660bbebc7..d3bf1f97f0 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java @@ -70,6 +70,11 @@ public abstract class IndexFactoryImpl implements return doc; } + @Override + public SolrInputDocument buildNewDocument(Context context, T indexableObject) throws SQLException, IOException { + return buildDocument(context, indexableObject); + } + @Override public void writeDocument(Context context, T indexableObject, SolrInputDocument solrInputDocument) throws SQLException, IOException, SolrServerException { diff --git a/dspace-api/src/main/java/org/dspace/discovery/indexobject/ItemIndexFactoryImpl.java b/dspace-api/src/main/java/org/dspace/discovery/indexobject/ItemIndexFactoryImpl.java index 07948bb0c3..8b766f2e81 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/indexobject/ItemIndexFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/indexobject/ItemIndexFactoryImpl.java @@ -78,6 +78,8 @@ public class ItemIndexFactoryImpl extends DSpaceObjectIndexFactoryImpl discoveryConfigurations) diff --git a/dspace-api/src/main/java/org/dspace/discovery/indexobject/factory/IndexFactory.java b/dspace-api/src/main/java/org/dspace/discovery/indexobject/factory/IndexFactory.java index 6644da248d..7946311796 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/indexobject/factory/IndexFactory.java +++ b/dspace-api/src/main/java/org/dspace/discovery/indexobject/factory/IndexFactory.java @@ -46,6 +46,14 @@ public interface IndexFactory { */ SolrInputDocument buildDocument(Context context, T indexableObject) throws SQLException, IOException; + /** + * Create solr document with all the shared fields initialized. + * Can contain special fields required for "new" documents vs regular buildDocument + * @param indexableObject the indexableObject that we want to index + * @return initialized solr document + */ + SolrInputDocument buildNewDocument(Context context, T indexableObject) throws SQLException, IOException; + /** * Write the provided document to the solr core * @param context DSpace context object diff --git a/dspace/config/spring/api/scripts.xml b/dspace/config/spring/api/scripts.xml index 52467dabde..26d7fb5aa5 100644 --- a/dspace/config/spring/api/scripts.xml +++ b/dspace/config/spring/api/scripts.xml @@ -40,4 +40,9 @@ + + + + + diff --git a/dspace/solr/search/conf/schema.xml b/dspace/solr/search/conf/schema.xml index e16e213135..809e4c7be9 100644 --- a/dspace/solr/search/conf/schema.xml +++ b/dspace/solr/search/conf/schema.xml @@ -278,6 +278,9 @@ + + +