DS-3981 Improve IndexClient usage & options

This commit is contained in:
April Herron
2021-11-16 15:25:26 -05:00
parent ff8e002bbc
commit b8a81ac824
5 changed files with 69 additions and 62 deletions

View File

@@ -61,13 +61,14 @@ public class IndexClient extends DSpaceRunnable<IndexDiscoveryScriptConfiguratio
indexer.unIndexContent(context, commandLine.getOptionValue("r")); indexer.unIndexContent(context, commandLine.getOptionValue("r"));
} else if (indexClientOptions == IndexClientOptions.CLEAN) { } else if (indexClientOptions == IndexClientOptions.CLEAN) {
handler.logInfo("Cleaning Index"); handler.logInfo("Cleaning Index");
indexer.cleanIndex(false); indexer.cleanIndex();
} else if (indexClientOptions == IndexClientOptions.FORCECLEAN) { } else if (indexClientOptions == IndexClientOptions.DELETE) {
handler.logInfo("Cleaning Index"); handler.logInfo("Deleting Index");
indexer.cleanIndex(true); indexer.deleteIndex();
} else if (indexClientOptions == IndexClientOptions.BUILD || } else if (indexClientOptions == IndexClientOptions.BUILD ||
indexClientOptions == IndexClientOptions.BUILDANDSPELLCHECK) { indexClientOptions == IndexClientOptions.BUILDANDSPELLCHECK) {
handler.logInfo("(Re)building index from scratch."); handler.logInfo("(Re)building index from scratch.");
indexer.deleteIndex();
indexer.createIndex(context); indexer.createIndex(context);
if (indexClientOptions == IndexClientOptions.BUILDANDSPELLCHECK) { if (indexClientOptions == IndexClientOptions.BUILDANDSPELLCHECK) {
checkRebuildSpellCheck(commandLine, indexer); checkRebuildSpellCheck(commandLine, indexer);
@@ -125,16 +126,14 @@ public class IndexClient extends DSpaceRunnable<IndexDiscoveryScriptConfiguratio
handler.logInfo("Indexed " + count + " object" + (count > 1 ? "s" : "") + " in " + seconds + " seconds"); handler.logInfo("Indexed " + count + " object" + (count > 1 ? "s" : "") + " in " + seconds + " seconds");
} else if (indexClientOptions == IndexClientOptions.UPDATE || } else if (indexClientOptions == IndexClientOptions.UPDATE ||
indexClientOptions == IndexClientOptions.UPDATEANDSPELLCHECK) { indexClientOptions == IndexClientOptions.UPDATEANDSPELLCHECK) {
handler.logInfo("Updating and Cleaning Index"); handler.logInfo("Updating Index");
indexer.cleanIndex(false);
indexer.updateIndex(context, false); indexer.updateIndex(context, false);
if (indexClientOptions == IndexClientOptions.UPDATEANDSPELLCHECK) { if (indexClientOptions == IndexClientOptions.UPDATEANDSPELLCHECK) {
checkRebuildSpellCheck(commandLine, indexer); checkRebuildSpellCheck(commandLine, indexer);
} }
} else if (indexClientOptions == IndexClientOptions.FORCEUPDATE || } else if (indexClientOptions == IndexClientOptions.FORCEUPDATE ||
indexClientOptions == IndexClientOptions.FORCEUPDATEANDSPELLCHECK) { indexClientOptions == IndexClientOptions.FORCEUPDATEANDSPELLCHECK) {
handler.logInfo("Updating and Cleaning Index"); handler.logInfo("Updating Index");
indexer.cleanIndex(true);
indexer.updateIndex(context, true); indexer.updateIndex(context, true);
if (indexClientOptions == IndexClientOptions.FORCEUPDATEANDSPELLCHECK) { if (indexClientOptions == IndexClientOptions.FORCEUPDATEANDSPELLCHECK) {
checkRebuildSpellCheck(commandLine, indexer); checkRebuildSpellCheck(commandLine, indexer);

View File

@@ -17,7 +17,7 @@ import org.apache.commons.cli.Options;
public enum IndexClientOptions { public enum IndexClientOptions {
REMOVE, REMOVE,
CLEAN, CLEAN,
FORCECLEAN, DELETE,
BUILD, BUILD,
BUILDANDSPELLCHECK, BUILDANDSPELLCHECK,
OPTIMIZE, OPTIMIZE,
@@ -41,11 +41,9 @@ public enum IndexClientOptions {
} else if (commandLine.hasOption("r")) { } else if (commandLine.hasOption("r")) {
return IndexClientOptions.REMOVE; return IndexClientOptions.REMOVE;
} else if (commandLine.hasOption("c")) { } else if (commandLine.hasOption("c")) {
if (commandLine.hasOption("f")) { return IndexClientOptions.CLEAN;
return IndexClientOptions.FORCECLEAN; } else if (commandLine.hasOption("d")) {
} else { return IndexClientOptions.DELETE;
return IndexClientOptions.CLEAN;
}
} else if (commandLine.hasOption("b")) { } else if (commandLine.hasOption("b")) {
if (commandLine.hasOption("s")) { if (commandLine.hasOption("s")) {
return IndexClientOptions.BUILDANDSPELLCHECK; return IndexClientOptions.BUILDANDSPELLCHECK;
@@ -83,6 +81,9 @@ public enum IndexClientOptions {
options.addOption("c", "clean", false, options.addOption("c", "clean", false,
"clean existing index removing any documents that no longer exist in the db"); "clean existing index removing any documents that no longer exist in the db");
options.getOption("c").setType(boolean.class); options.getOption("c").setType(boolean.class);
options.addOption("d", "delete", false,
"delete all records from existing index");
options.getOption("d").setType(boolean.class);
options.addOption("b", "build", false, "(re)build index, wiping out current one if it exists"); options.addOption("b", "build", false, "(re)build index, wiping out current one if it exists");
options.getOption("b").setType(boolean.class); options.getOption("b").setType(boolean.class);
options.addOption("s", "spellchecker", false, "Rebuild the spellchecker, can be combined with -b and -f."); options.addOption("s", "spellchecker", false, "Rebuild the spellchecker, can be combined with -b and -f.");

View File

@@ -53,8 +53,9 @@ public interface IndexingService {
void updateIndex(Context context, boolean force, String type); void updateIndex(Context context, boolean force, String type);
void cleanIndex(boolean force) throws IOException, void cleanIndex() throws IOException, SQLException, SearchServiceException;
SQLException, SearchServiceException;
void deleteIndex();
void commit() throws SearchServiceException; void commit() throws SearchServiceException;

View File

@@ -333,17 +333,31 @@ public class SolrServiceImpl implements SearchService, IndexingService {
} }
} }
/**
* Removes all documents from the Lucene index
*/
public void deleteIndex() {
try {
final List<IndexFactory> indexableObjectServices = indexObjectServiceFactory.
getIndexFactories();
for (IndexFactory indexableObjectService : indexableObjectServices) {
indexableObjectService.deleteAll();
}
} catch (IOException | SolrServerException e) {
log.error("Error cleaning discovery index: " + e.getMessage(), e);
}
}
/** /**
* Iterates over all documents in the Lucene index and verifies they are in * Iterates over all documents in the Lucene index and verifies they are in
* database, if not, they are removed. * database, if not, they are removed.
* *
* @param force whether or not to force a clean index
* @throws IOException IO exception * @throws IOException IO exception
* @throws SQLException sql exception * @throws SQLException sql exception
* @throws SearchServiceException occurs when something went wrong with querying the solr server * @throws SearchServiceException occurs when something went wrong with querying the solr server
*/ */
@Override @Override
public void cleanIndex(boolean force) throws IOException, SQLException, SearchServiceException { public void cleanIndex() throws IOException, SQLException, SearchServiceException {
Context context = new Context(); Context context = new Context();
context.turnOffAuthorisationSystem(); context.turnOffAuthorisationSystem();
@@ -351,56 +365,48 @@ public class SolrServiceImpl implements SearchService, IndexingService {
if (solrSearchCore.getSolr() == null) { if (solrSearchCore.getSolr() == null) {
return; return;
} }
if (force) { // First, we'll just get a count of the total results
final List<IndexFactory> indexableObjectServices = indexObjectServiceFactory. SolrQuery countQuery = new SolrQuery("*:*");
getIndexFactories(); countQuery.setRows(0); // don't actually request any data
for (IndexFactory indexableObjectService : indexableObjectServices) { // Get the total amount of results
indexableObjectService.deleteAll(); QueryResponse totalResponse = solrSearchCore.getSolr().query(countQuery,
} solrSearchCore.REQUEST_METHOD);
} else { long total = totalResponse.getResults().getNumFound();
// First, we'll just get a count of the total results
SolrQuery countQuery = new SolrQuery("*:*");
countQuery.setRows(0); // don't actually request any data
// Get the total amount of results
QueryResponse totalResponse = solrSearchCore.getSolr().query(countQuery,
solrSearchCore.REQUEST_METHOD);
long total = totalResponse.getResults().getNumFound();
int start = 0; int start = 0;
int batch = 100; int batch = 100;
// Now get actual Solr Documents in batches // Now get actual Solr Documents in batches
SolrQuery query = new SolrQuery(); SolrQuery query = new SolrQuery();
query.setFields(SearchUtils.RESOURCE_UNIQUE_ID, SearchUtils.RESOURCE_ID_FIELD, query.setFields(SearchUtils.RESOURCE_UNIQUE_ID, SearchUtils.RESOURCE_ID_FIELD,
SearchUtils.RESOURCE_TYPE_FIELD); SearchUtils.RESOURCE_TYPE_FIELD);
query.addSort(SearchUtils.RESOURCE_UNIQUE_ID, SolrQuery.ORDER.asc); query.addSort(SearchUtils.RESOURCE_UNIQUE_ID, SolrQuery.ORDER.asc);
query.setQuery("*:*"); query.setQuery("*:*");
query.setRows(batch); query.setRows(batch);
// Keep looping until we hit the total number of Solr docs // Keep looping until we hit the total number of Solr docs
while (start < total) { while (start < total) {
query.setStart(start); query.setStart(start);
QueryResponse rsp = solrSearchCore.getSolr().query(query, solrSearchCore.REQUEST_METHOD); QueryResponse rsp = solrSearchCore.getSolr().query(query, solrSearchCore.REQUEST_METHOD);
SolrDocumentList docs = rsp.getResults(); SolrDocumentList docs = rsp.getResults();
for (SolrDocument doc : docs) { for (SolrDocument doc : docs) {
String uniqueID = (String) doc.getFieldValue(SearchUtils.RESOURCE_UNIQUE_ID); String uniqueID = (String) doc.getFieldValue(SearchUtils.RESOURCE_UNIQUE_ID);
IndexableObject o = findIndexableObject(context, doc); IndexableObject o = findIndexableObject(context, doc);
if (o == null) { if (o == null) {
log.info("Deleting: " + uniqueID); log.info("Deleting: " + uniqueID);
/* /*
* Use IndexWriter to delete, its easier to manage * Use IndexWriter to delete, its easier to manage
* write.lock * write.lock
*/ */
unIndexContent(context, uniqueID); unIndexContent(context, uniqueID);
} else { } else {
log.debug("Keeping: " + o.getUniqueIndexID()); log.debug("Keeping: " + o.getUniqueIndexID());
}
} }
start += batch;
} }
start += batch;
} }
} catch (IOException | SQLException | SolrServerException e) { } catch (IOException | SQLException | SolrServerException e) {
log.error("Error cleaning discovery index: " + e.getMessage(), e); log.error("Error cleaning discovery index: " + e.getMessage(), e);

View File

@@ -1327,7 +1327,7 @@ public class DatabaseUtils {
// Reindex Discovery completely // Reindex Discovery completely
// Force clean all content // Force clean all content
this.indexer.cleanIndex(true); this.indexer.deleteIndex();
// Recreate the entire index (overwriting existing one) // Recreate the entire index (overwriting existing one)
this.indexer.createIndex(context); this.indexer.createIndex(context);
// Rebuild spell checker (which is based on index) // Rebuild spell checker (which is based on index)