[DS-3695] Add statistics shard configset, fix various errors, improve logging.

This commit is contained in:
Mark H. Wood
2019-08-26 13:54:44 -04:00
parent 8878b3a313
commit f4d7c35108
3 changed files with 41 additions and 15 deletions

View File

@@ -47,10 +47,13 @@ import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.http.HttpResponse; import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest; import org.apache.solr.client.solrj.request.CoreAdminRequest;
@@ -108,8 +111,7 @@ import org.springframework.beans.factory.annotation.Autowired;
*/ */
public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBean { public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBean {
private static final org.apache.logging.log4j.Logger log = private static final Logger log = LogManager.getLogger();
org.apache.logging.log4j.LogManager.getLogger(SolrLoggerServiceImpl.class);
private static final String MULTIPLE_VALUES_SPLITTER = "|"; private static final String MULTIPLE_VALUES_SPLITTER = "|";
protected SolrClient solr; protected SolrClient solr;
@@ -167,7 +169,7 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
statisticsCoreURL = configurationService.getProperty("solr-statistics.server"); statisticsCoreURL = configurationService.getProperty("solr-statistics.server");
if (null != statisticsCoreURL) { if (null != statisticsCoreURL) {
Path statisticsPath = Paths.get(new URI(statisticsCoreURL)); Path statisticsPath = Paths.get(new URI(statisticsCoreURL).getPath());
statisticsCoreBase = statisticsPath statisticsCoreBase = statisticsPath
.getName(statisticsPath.getNameCount() - 1) .getName(statisticsPath.getNameCount() - 1)
.toString(); .toString();
@@ -1221,12 +1223,12 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
for (File tempCsv : filesToUpload) { for (File tempCsv : filesToUpload) {
//Upload the data in the csv files to our new solr core //Upload the data in the csv files to our new solr core
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update");
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8");
contentStreamUpdateRequest.setParam("escape", "\\"); contentStreamUpdateRequest.setParam("escape", "\\");
contentStreamUpdateRequest.setParam("skip", "_version_"); contentStreamUpdateRequest.setParam("skip", "_version_");
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); contentStreamUpdateRequest.addFile(tempCsv, "text/csv;charset=utf-8");
//Add parsing directives for the multivalued fields so that they are stored as separate values //Add parsing directives for the multivalued fields so that they are stored as separate values
// instead of one value // instead of one value
@@ -1246,7 +1248,7 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
solr.deleteByQuery(filterQuery.toString()); solr.deleteByQuery(filterQuery.toString());
solr.commit(true, true); solr.commit(true, true);
log.info("Moved " + totalRecords + " records into core: " + coreName); log.info("Moved {} records into core: {}", totalRecords, coreName);
} }
FileUtils.deleteDirectory(tempDirectory); FileUtils.deleteDirectory(tempDirectory);
@@ -1254,18 +1256,18 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
protected HttpSolrClient createCore(HttpSolrClient solr, String coreName) protected HttpSolrClient createCore(HttpSolrClient solr, String coreName)
throws IOException, SolrServerException { throws IOException, SolrServerException {
String baseSolrUrl = solr.getBaseURL().replace(statisticsCoreBase, ""); String baseSolrUrl = solr.getBaseURL().replace(statisticsCoreBase, ""); // Has trailing slash
//DS-3458: Test to see if a solr core already exists. If it exists, //DS-3458: Test to see if a solr core already exists. If it exists,
// return a connection to that core. Otherwise create a new core and // return a connection to that core. Otherwise create a new core and
// return a connection to it. // return a connection to it.
HttpSolrClient returnServer = new HttpSolrClient.Builder(baseSolrUrl + "/" + coreName).build(); HttpSolrClient returnServer = new HttpSolrClient.Builder(baseSolrUrl + coreName).build();
try { try {
SolrPingResponse ping = returnServer.ping(); SolrPingResponse ping = returnServer.ping();
log.debug("Ping of Solr Core {} returned with Status {}", log.debug("Ping of Solr Core {} returned with Status {}",
coreName, ping.getStatus()); coreName, ping.getStatus());
return returnServer; return returnServer;
} catch (IOException | SolrServerException e) { } catch (IOException | RemoteSolrException | SolrServerException e) {
log.debug("Ping of Solr Core {} failed with {}. New Core Will be Created", log.debug("Ping of Solr Core {} failed with {}. New Core Will be Created",
coreName, e.getClass().getName()); coreName, e.getClass().getName());
} }
@@ -1274,10 +1276,14 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
//http://lucene.apache.org/solr/4_4_0/solr-solrj/org/apache/solr/client/solrj/request/CoreAdminRequest.html //http://lucene.apache.org/solr/4_4_0/solr-solrj/org/apache/solr/client/solrj/request/CoreAdminRequest.html
CoreAdminRequest.Create create = new CoreAdminRequest.Create(); CoreAdminRequest.Create create = new CoreAdminRequest.Create();
create.setCoreName(coreName); create.setCoreName(coreName);
String configSetName = configurationService
.getProperty("solr-statistics.configset", "statistics");
create.setConfigSet(configSetName);
create.setInstanceDir(coreName);
HttpSolrClient solrServer = new HttpSolrClient.Builder(baseSolrUrl).build(); HttpSolrClient solrServer = new HttpSolrClient.Builder(baseSolrUrl).build();
create.process(solrServer); create.process(solrServer);
log.info("Created core with name: " + coreName); log.info("Created core with name: {} from configset {}", coreName, configSetName);
return returnServer; return returnServer;
} }
@@ -1412,10 +1418,10 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
//Add all the separate csv files //Add all the separate csv files
for (File tempCsv : tempCsvFiles) { for (File tempCsv : tempCsvFiles) {
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update");
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8");
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); contentStreamUpdateRequest.addFile(tempCsv, "text/csv;charset=utf-8");
solr.request(contentStreamUpdateRequest); solr.request(contentStreamUpdateRequest);
} }

View File

@@ -0,0 +1,16 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
/**
* Facilities for detecting and storing both model and usage events, and for
* querying the store.
*
* See {@link SolrLoggerUsageEventListener} and {@link StatisticsLoggingConsumer}
* for event capture, and {@link SolrLoggerServiceImpl} for storing and querying.
*/
package org.dspace.statistics;

View File

@@ -15,6 +15,10 @@ solr-statistics.server = ${solr.server}/statistics
# A comma-separated list that contains the bundles for which the bitstreams will be displayed # A comma-separated list that contains the bundles for which the bitstreams will be displayed
solr-statistics.query.filter.bundles=ORIGINAL solr-statistics.query.filter.bundles=ORIGINAL
# Name of the "configset" (bundle of template core files) which will be used to
# create new Solr cores when sharding the statistics data.
solr-statistics.configset = statistics
# control solr statistics querying to filter out spider IPs # control solr statistics querying to filter out spider IPs
# false by default # false by default
#solr-statistics.query.filter.spiderIp = false #solr-statistics.query.filter.spiderIp = false
@@ -30,4 +34,4 @@ solr-statistics.spiderips.urls = http://iplists.com/google.txt, \
http://iplists.com/infoseek.txt, \ http://iplists.com/infoseek.txt, \
http://iplists.com/altavista.txt, \ http://iplists.com/altavista.txt, \
http://iplists.com/excite.txt, \ http://iplists.com/excite.txt, \
http://iplists.com/misc.txt http://iplists.com/misc.txt