mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-16 14:33:09 +00:00
90830: Issue 8125: Import items stale in discovery - intermediate commit
This commit is contained in:
@@ -0,0 +1,132 @@
|
||||
package org.dspace.app.itemdbstatus;
|
||||
|
||||
import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD;
|
||||
import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD_PREDB;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.factory.ContentServiceFactory;
|
||||
import org.dspace.content.service.ItemService;
|
||||
import org.dspace.core.Context;
|
||||
import org.dspace.discovery.DiscoverQuery;
|
||||
import org.dspace.discovery.DiscoverResult;
|
||||
import org.dspace.discovery.IndexableObject;
|
||||
import org.dspace.discovery.IndexingService;
|
||||
import org.dspace.discovery.SearchService;
|
||||
import org.dspace.discovery.SearchServiceException;
|
||||
import org.dspace.discovery.SearchUtils;
|
||||
import org.dspace.discovery.SolrSearchCore;
|
||||
import org.dspace.discovery.indexobject.IndexableItem;
|
||||
import org.dspace.discovery.indexobject.factory.IndexObjectFactoryFactory;
|
||||
import org.dspace.scripts.DSpaceRunnable;
|
||||
import org.dspace.services.factory.DSpaceServicesFactory;
|
||||
import org.dspace.utils.DSpace;
|
||||
|
||||
/**
|
||||
* Created by kristof on 19/04/2022
|
||||
*/
|
||||
public class ItemDatabaseStatusCli extends DSpaceRunnable<ItemDatabaseStatusCliScriptConfiguration> {
|
||||
/* Log4j logger */
|
||||
private static final Logger log = Logger.getLogger(ItemDatabaseStatusCli.class);
|
||||
|
||||
private SearchService searchService;
|
||||
private ItemService itemService;
|
||||
private IndexingService indexingService;
|
||||
private SolrSearchCore solrSearchCore;
|
||||
private IndexObjectFactoryFactory indexObjectServiceFactory;
|
||||
|
||||
@Override
|
||||
public ItemDatabaseStatusCliScriptConfiguration getScriptConfiguration() {
|
||||
return new DSpace().getServiceManager()
|
||||
.getServiceByName("item-database-status", ItemDatabaseStatusCliScriptConfiguration.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setup() throws ParseException {
|
||||
searchService = SearchUtils.getSearchService();
|
||||
itemService = ContentServiceFactory.getInstance().getItemService();
|
||||
indexingService = DSpaceServicesFactory.getInstance().getServiceManager()
|
||||
.getServiceByName(IndexingService.class.getName(), IndexingService.class);
|
||||
solrSearchCore = DSpaceServicesFactory.getInstance().getServiceManager()
|
||||
.getServiceByName(SolrSearchCore.class.getName(), SolrSearchCore.class);
|
||||
indexObjectServiceFactory = IndexObjectFactoryFactory.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void internalRun() throws Exception {
|
||||
logAndOut("Starting Item Database Status update...");
|
||||
|
||||
Context context = new Context();
|
||||
|
||||
try {
|
||||
context.turnOffAuthorisationSystem();
|
||||
performStatusUpdate(context);
|
||||
} finally {
|
||||
context.restoreAuthSystemState();
|
||||
context.complete();
|
||||
}
|
||||
}
|
||||
|
||||
private void performStatusUpdate(Context context) throws SearchServiceException, SolrServerException, IOException {
|
||||
SolrQuery solrQuery = new SolrQuery();
|
||||
solrQuery.setQuery(STATUS_FIELD + ":" + STATUS_FIELD_PREDB);
|
||||
solrQuery.addFilterQuery(SearchUtils.RESOURCE_TYPE_FIELD + ":" + IndexableItem.TYPE);
|
||||
solrQuery.addField(SearchUtils.RESOURCE_ID_FIELD);
|
||||
solrQuery.addField(SearchUtils.RESOURCE_UNIQUE_ID);
|
||||
QueryResponse response = solrSearchCore.getSolr().query(solrQuery, solrSearchCore.REQUEST_METHOD);
|
||||
|
||||
if (response != null) {
|
||||
for (SolrDocument doc : response.getResults()) {
|
||||
String uuid = (String) doc.getFirstValue(SearchUtils.RESOURCE_ID_FIELD);
|
||||
String uniqueId = (String) doc.getFirstValue(SearchUtils.RESOURCE_UNIQUE_ID);
|
||||
logAndOut("Processing item with UUID: " + uuid);
|
||||
|
||||
Optional<IndexableObject> indexableObject = Optional.empty();
|
||||
try {
|
||||
indexableObject = indexObjectServiceFactory
|
||||
.getIndexableObjectFactory(uniqueId).findIndexableObject(context, uuid);
|
||||
} catch (SQLException e) {
|
||||
log.warn("An exception occurred when attempting to retrieve item with UUID \"" + uuid +
|
||||
"\" from the database, removing related solr document", e);
|
||||
}
|
||||
|
||||
try {
|
||||
if (indexableObject.isPresent()) {
|
||||
logAndOut("Item exists in DB, updating solr document");
|
||||
updateItem(context, indexableObject.get());
|
||||
} else {
|
||||
logAndOut("Item doesn't exist in DB, removing solr document");
|
||||
removeItem(context, uniqueId);
|
||||
}
|
||||
} catch (SQLException | IOException e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
indexingService.commit();
|
||||
}
|
||||
|
||||
private void updateItem(Context context, IndexableObject indexableObject) throws SQLException {
|
||||
indexingService.indexContent(context, indexableObject, true);
|
||||
}
|
||||
|
||||
private void removeItem(Context context, String uniqueId) throws IOException, SQLException {
|
||||
indexingService.unIndexContent(context, uniqueId);
|
||||
}
|
||||
|
||||
private void logAndOut(String message) {
|
||||
log.info(message);
|
||||
System.out.println(message);
|
||||
}
|
||||
}
|
@@ -0,0 +1,35 @@
|
||||
package org.dspace.app.itemdbstatus;
|
||||
|
||||
import org.apache.commons.cli.Options;
|
||||
import org.dspace.core.Context;
|
||||
import org.dspace.scripts.configuration.ScriptConfiguration;
|
||||
|
||||
/**
|
||||
* Created by kristof on 19/04/2022
|
||||
*/
|
||||
public class ItemDatabaseStatusCliScriptConfiguration extends ScriptConfiguration<ItemDatabaseStatusCli> {
|
||||
private Class<ItemDatabaseStatusCli> dspaceRunnableClass;
|
||||
|
||||
@Override
|
||||
public Class<ItemDatabaseStatusCli> getDspaceRunnableClass() {
|
||||
return dspaceRunnableClass;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDspaceRunnableClass(Class<ItemDatabaseStatusCli> dspaceRunnableClass) {
|
||||
this.dspaceRunnableClass = dspaceRunnableClass;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAllowedToExecute(Context context) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Options getOptions() {
|
||||
if (options == null) {
|
||||
options = new Options();
|
||||
}
|
||||
return options;
|
||||
}
|
||||
}
|
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
package org.dspace.discovery;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
@@ -37,6 +38,8 @@ public class IndexEventConsumer implements Consumer {
|
||||
|
||||
// collect Items, Collections, Communities that need indexing
|
||||
private Set<IndexableObject> objectsToUpdate = new HashSet<>();
|
||||
// collect freshly created Items that need indexing (requires pre-db status)
|
||||
private Set<IndexableObject> createdItemsToUpdate = new HashSet<>();
|
||||
|
||||
// unique search IDs to delete
|
||||
private Set<String> uniqueIdsToDelete = new HashSet<>();
|
||||
@@ -65,6 +68,7 @@ public class IndexEventConsumer implements Consumer {
|
||||
if (objectsToUpdate == null) {
|
||||
objectsToUpdate = new HashSet<>();
|
||||
uniqueIdsToDelete = new HashSet<>();
|
||||
createdItemsToUpdate = new HashSet<>();
|
||||
}
|
||||
|
||||
int st = event.getSubjectType();
|
||||
@@ -143,8 +147,12 @@ public class IndexEventConsumer implements Consumer {
|
||||
String detail = indexableObjectService.getType() + "-" + event.getSubjectID().toString();
|
||||
uniqueIdsToDelete.add(detail);
|
||||
}
|
||||
if (st == Constants.ITEM && et == Event.CREATE && object == null) {
|
||||
createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject));
|
||||
} else {
|
||||
objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, subject));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case Event.REMOVE:
|
||||
@@ -162,7 +170,7 @@ public class IndexEventConsumer implements Consumer {
|
||||
// also update the object in order to index mapped/unmapped Items
|
||||
if (subject != null &&
|
||||
subject.getType() == Constants.COLLECTION && object.getType() == Constants.ITEM) {
|
||||
objectsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object));
|
||||
createdItemsToUpdate.addAll(indexObjectServiceFactory.getIndexableObjects(ctx, object));
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -209,23 +217,11 @@ public class IndexEventConsumer implements Consumer {
|
||||
}
|
||||
// update the changed Items not deleted because they were on create list
|
||||
for (IndexableObject iu : objectsToUpdate) {
|
||||
/* we let all types through here and
|
||||
* allow the search indexer to make
|
||||
* decisions on indexing and/or removal
|
||||
*/
|
||||
iu.setIndexedObject(ctx.reloadEntity(iu.getIndexedObject()));
|
||||
String uniqueIndexID = iu.getUniqueIndexID();
|
||||
if (uniqueIndexID != null) {
|
||||
try {
|
||||
indexer.indexContent(ctx, iu, true, false);
|
||||
log.debug("Indexed "
|
||||
+ iu.getTypeText()
|
||||
+ ", id=" + iu.getID()
|
||||
+ ", unique_id=" + uniqueIndexID);
|
||||
} catch (Exception e) {
|
||||
log.error("Failed while indexing object: ", e);
|
||||
}
|
||||
indexObject(ctx, iu, false);
|
||||
}
|
||||
// update the created Items with a pre-db status
|
||||
for (IndexableObject iu : createdItemsToUpdate) {
|
||||
indexObject(ctx, iu, true);
|
||||
}
|
||||
} finally {
|
||||
if (!objectsToUpdate.isEmpty() || !uniqueIdsToDelete.isEmpty()) {
|
||||
@@ -235,6 +231,27 @@ public class IndexEventConsumer implements Consumer {
|
||||
// "free" the resources
|
||||
objectsToUpdate.clear();
|
||||
uniqueIdsToDelete.clear();
|
||||
createdItemsToUpdate.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void indexObject(Context ctx, IndexableObject iu, boolean preDb) throws SQLException {
|
||||
/* we let all types through here and
|
||||
* allow the search indexer to make
|
||||
* decisions on indexing and/or removal
|
||||
*/
|
||||
iu.setIndexedObject(ctx.reloadEntity(iu.getIndexedObject()));
|
||||
String uniqueIndexID = iu.getUniqueIndexID();
|
||||
if (uniqueIndexID != null) {
|
||||
try {
|
||||
indexer.indexContent(ctx, iu, true, false, preDb);
|
||||
log.debug("Indexed "
|
||||
+ iu.getTypeText()
|
||||
+ ", id=" + iu.getID()
|
||||
+ ", unique_id=" + uniqueIndexID);
|
||||
} catch (Exception e) {
|
||||
log.error("Failed while indexing object: ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -30,6 +30,9 @@ public interface IndexingService {
|
||||
void indexContent(Context context, IndexableObject dso,
|
||||
boolean force, boolean commit) throws SQLException, SearchServiceException;
|
||||
|
||||
void indexContent(Context context, IndexableObject dso,
|
||||
boolean force, boolean commit, boolean preDb) throws SQLException, SearchServiceException;
|
||||
|
||||
void unIndexContent(Context context, IndexableObject dso)
|
||||
throws SQLException, IOException;
|
||||
|
||||
|
@@ -8,6 +8,8 @@
|
||||
package org.dspace.discovery;
|
||||
|
||||
import static java.util.stream.Collectors.joining;
|
||||
import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD;
|
||||
import static org.dspace.discovery.indexobject.ItemIndexFactoryImpl.STATUS_FIELD_PREDB;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
@@ -166,6 +168,16 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
||||
indexableObjectService.writeDocument(context, indexableObject, solrInputDocument);
|
||||
}
|
||||
|
||||
protected void update(Context context, IndexFactory indexableObjectService,
|
||||
IndexableObject indexableObject, boolean preDB) throws IOException, SQLException, SolrServerException {
|
||||
if (preDB) {
|
||||
final SolrInputDocument solrInputDocument = indexableObjectService.buildNewDocument(context, indexableObject);
|
||||
indexableObjectService.writeDocument(context, indexableObject, solrInputDocument);
|
||||
} else {
|
||||
update(context, indexableObjectService, indexableObject);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* unIndex removes an Item, Collection, or Community
|
||||
*
|
||||
@@ -754,6 +766,9 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
||||
solrQuery.setParam("spellcheck", Boolean.TRUE);
|
||||
}
|
||||
|
||||
// Exclude items with status:predb to avoid solr docs being removed during large imports (Issue #8125)
|
||||
solrQuery.addFilterQuery("!" + STATUS_FIELD + ":" + STATUS_FIELD_PREDB);
|
||||
|
||||
for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) {
|
||||
String filterQuery = discoveryQuery.getFilterQueries().get(i);
|
||||
solrQuery.addFilterQuery(filterQuery);
|
||||
@@ -1384,6 +1399,28 @@ public class SolrServiceImpl implements SearchService, IndexingService {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void indexContent(Context context, IndexableObject indexableObject, boolean force,
|
||||
boolean commit, boolean preDb) throws SearchServiceException, SQLException {
|
||||
if (preDb) {
|
||||
try {
|
||||
final IndexFactory indexableObjectFactory = indexObjectServiceFactory.
|
||||
getIndexableObjectFactory(indexableObject);
|
||||
if (force || requiresIndexing(indexableObject.getUniqueIndexID(), indexableObject.getLastModified())) {
|
||||
update(context, indexableObjectFactory, indexableObject, true);
|
||||
log.info(LogManager.getHeader(context, "indexed_object", indexableObject.getUniqueIndexID()));
|
||||
}
|
||||
} catch (IOException | SQLException | SolrServerException | SearchServiceException e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
} else {
|
||||
indexContent(context, indexableObject, force);
|
||||
}
|
||||
if (commit) {
|
||||
commit();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() throws SearchServiceException {
|
||||
try {
|
||||
|
@@ -70,6 +70,11 @@ public abstract class IndexFactoryImpl<T extends IndexableObject, S> implements
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SolrInputDocument buildNewDocument(Context context, T indexableObject) throws SQLException, IOException {
|
||||
return buildDocument(context, indexableObject);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeDocument(Context context, T indexableObject, SolrInputDocument solrInputDocument)
|
||||
throws SQLException, IOException, SolrServerException {
|
||||
|
@@ -78,6 +78,8 @@ public class ItemIndexFactoryImpl extends DSpaceObjectIndexFactoryImpl<Indexable
|
||||
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(ItemIndexFactoryImpl.class);
|
||||
public static final String VARIANTS_STORE_SEPARATOR = "###";
|
||||
public static final String STORE_SEPARATOR = "\n|||\n";
|
||||
public static final String STATUS_FIELD = "status";
|
||||
public static final String STATUS_FIELD_PREDB = "predb";
|
||||
|
||||
|
||||
@Autowired
|
||||
@@ -169,6 +171,13 @@ public class ItemIndexFactoryImpl extends DSpaceObjectIndexFactoryImpl<Indexable
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SolrInputDocument buildNewDocument(Context context, IndexableItem indexableItem) throws SQLException, IOException {
|
||||
SolrInputDocument doc = buildDocument(context, indexableItem);
|
||||
doc.addField(STATUS_FIELD, STATUS_FIELD_PREDB);
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addDiscoveryFields(SolrInputDocument doc, Context context, Item item,
|
||||
List<DiscoveryConfiguration> discoveryConfigurations)
|
||||
|
@@ -46,6 +46,14 @@ public interface IndexFactory<T extends IndexableObject, S> {
|
||||
*/
|
||||
SolrInputDocument buildDocument(Context context, T indexableObject) throws SQLException, IOException;
|
||||
|
||||
/**
|
||||
* Create solr document with all the shared fields initialized.
|
||||
* Can contain special fields required for "new" documents vs regular buildDocument
|
||||
* @param indexableObject the indexableObject that we want to index
|
||||
* @return initialized solr document
|
||||
*/
|
||||
SolrInputDocument buildNewDocument(Context context, T indexableObject) throws SQLException, IOException;
|
||||
|
||||
/**
|
||||
* Write the provided document to the solr core
|
||||
* @param context DSpace context object
|
||||
|
@@ -40,4 +40,9 @@
|
||||
<property name="description" value="Delete all the values of the specified metadata field"/>
|
||||
<property name="dspaceRunnableClass" value="org.dspace.app.bulkedit.MetadataDeletionCli"/>
|
||||
</bean>
|
||||
|
||||
<bean id="item-database-status" class="org.dspace.app.itemdbstatus.ItemDatabaseStatusCliScriptConfiguration">
|
||||
<property name="description" value="Update the database status of Items in solr"/>
|
||||
<property name="dspaceRunnableClass" value="org.dspace.app.itemdbstatus.ItemDatabaseStatusCli"/>
|
||||
</bean>
|
||||
</beans>
|
||||
|
@@ -278,6 +278,9 @@
|
||||
<field name="SolrIndexer.lastIndexed" type="date" indexed="true" stored="true" default="NOW" multiValued="false" omitNorms="true" />
|
||||
<field name="lastModified" type="date" indexed="true" stored="true" default="NOW" multiValued="false" omitNorms="true" />
|
||||
|
||||
<!-- The database status of the current item -->
|
||||
<field name="status" type="string" indexed="true" stored="true" omitNorms="true" docValues="true" />
|
||||
|
||||
<!-- Dynamic field used to store the relation metadata as a keywordFilter -->
|
||||
<dynamicField name="relation.*" type="keywordFilter" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
|
||||
|
Reference in New Issue
Block a user