Merge pull request #2495 from mwoodiupui/DS-3695-simple

[D-3695] Upgrade to Solr 7:  support sharded statistics simply
This commit is contained in:
Mark H. Wood
2019-09-12 10:54:45 -04:00
committed by GitHub
12 changed files with 313 additions and 330 deletions

View File

@@ -759,6 +759,13 @@
<version>20180130</version>
</dependency>
<!-- Used for Solr core export/import -->
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>4.5</version>
</dependency>
<dependency>
<groupId>org.apache.velocity</groupId>
<artifactId>velocity-engine-core</artifactId>

View File

@@ -8,6 +8,7 @@
package org.dspace.authority;
import java.sql.SQLException;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@@ -23,6 +24,7 @@ import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.core.Context;
import org.dspace.util.SolrUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
@@ -34,8 +36,6 @@ import org.joda.time.format.ISODateTimeFormat;
* @author Mark Diggory (markd at atmire dot com)
*/
public class AuthorityValue {
/**
* The id of the record in solr
*/
@@ -150,12 +150,13 @@ public class AuthorityValue {
public SolrInputDocument getSolrInputDocument() {
SolrInputDocument doc = new SolrInputDocument();
DateFormat solrDateFormatter = SolrUtils.getDateFormatter();
doc.addField("id", getId());
doc.addField("field", getField());
doc.addField("value", getValue());
doc.addField("deleted", isDeleted());
doc.addField("creation_date", getCreationDate());
doc.addField("last_modified_date", getLastModified());
doc.addField("creation_date", solrDateFormatter.format(getCreationDate()));
doc.addField("last_modified_date", solrDateFormatter.format(getLastModified()));
doc.addField("authority_type", getAuthorityType());
return doc;
}
@@ -196,12 +197,12 @@ public class AuthorityValue {
* @return map
*/
public Map<String, String> choiceSelectMap() {
return new HashMap<String, String>();
return new HashMap<>();
}
public List<DateTimeFormatter> getDateFormatters() {
List<DateTimeFormatter> list = new ArrayList<DateTimeFormatter>();
List<DateTimeFormatter> list = new ArrayList<>();
list.add(ISODateTimeFormat.dateTime());
list.add(ISODateTimeFormat.dateTimeNoMillis());
return list;

View File

@@ -14,6 +14,7 @@ import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringWriter;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -105,6 +106,7 @@ import org.dspace.handle.service.HandleService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.storage.rdbms.DatabaseUtils;
import org.dspace.util.MultiFormatDateParser;
import org.dspace.util.SolrUtils;
import org.dspace.workflow.WorkflowItem;
import org.dspace.xmlworkflow.WorkflowConfigurationException;
import org.dspace.xmlworkflow.factory.XmlWorkflowFactory;
@@ -1038,6 +1040,8 @@ public class SolrServiceImpl implements SearchService, IndexingService {
*/
protected void buildDocument(Context context, Item item)
throws SQLException, IOException {
final DateFormat solrDateFormatter = SolrUtils.getDateFormatter();
String handle = item.getHandle();
if (handle == null) {
@@ -1055,7 +1059,7 @@ public class SolrServiceImpl implements SearchService, IndexingService {
doc.addField("archived", item.isArchived());
doc.addField("withdrawn", item.isWithdrawn());
doc.addField("discoverable", item.isDiscoverable());
doc.addField("lastModified", item.getLastModified());
doc.addField("lastModified", solrDateFormatter.format(item.getLastModified()));
EPerson submitter = item.getSubmitter();
if (submitter != null) {
@@ -1476,7 +1480,9 @@ public class SolrServiceImpl implements SearchService, IndexingService {
if (type.equals(DiscoveryConfigurationParameters.TYPE_DATE)) {
Date date = MultiFormatDateParser.parse(value);
if (date != null) {
doc.addField(field + "_dt", date);
final DateFormat solrDateFormatter = SolrUtils.getDateFormatter();
String stringDate = solrDateFormatter.format(date);
doc.addField(field + "_dt", stringDate);
} else {
log.warn("Error while indexing sort date field, item: " + item
.getHandle() + " metadata field: " + field + " date value: " + date);
@@ -1741,7 +1747,8 @@ public class SolrServiceImpl implements SearchService, IndexingService {
List<String> locations) {
// want to be able to check when last updated
// (not tokenized, but it is indexed)
doc.addField(LAST_INDEXED_FIELD, new Date());
doc.addField(LAST_INDEXED_FIELD,
SolrUtils.getDateFormatter().format(new Date()));
// New fields to weaken the dependence on handles, and allow for faster
// list display

View File

@@ -8,7 +8,6 @@
package org.dspace.statistics;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
@@ -17,7 +16,10 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.InetAddress;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.ParseException;
@@ -45,14 +47,18 @@ import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.response.CoreAdminResponse;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
@@ -64,10 +70,12 @@ import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.luke.FieldFlag;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
@@ -103,8 +111,7 @@ import org.springframework.beans.factory.annotation.Autowired;
*/
public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBean {
private static final org.apache.logging.log4j.Logger log =
org.apache.logging.log4j.LogManager.getLogger(SolrLoggerServiceImpl.class);
private static final Logger log = LogManager.getLogger();
private static final String MULTIPLE_VALUES_SPLITTER = "|";
protected SolrClient solr;
@@ -129,6 +136,12 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
@Autowired(required = true)
private ClientInfoService clientInfoService;
/** URL to the current-year statistics core. Prior-year shards will have a year suffixed. */
private String statisticsCoreURL;
/** Name of the current-year statistics core. Prior-year shards will have a year suffixed. */
private String statisticsCoreBase;
public static enum StatisticsType {
VIEW("view"),
SEARCH("search"),
@@ -153,14 +166,26 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
@Override
public void afterPropertiesSet() throws Exception {
log.info("solr-statistics.server:" + configurationService.getProperty("solr-statistics.server"));
log.info("usage-statistics.dbfile:" + configurationService.getProperty("usage-statistics.dbfile"));
statisticsCoreURL = configurationService.getProperty("solr-statistics.server");
if (null != statisticsCoreURL) {
Path statisticsPath = Paths.get(new URI(statisticsCoreURL).getPath());
statisticsCoreBase = statisticsPath
.getName(statisticsPath.getNameCount() - 1)
.toString();
} else {
statisticsCoreBase = null;
}
log.info("solr-statistics.server: {}", statisticsCoreURL);
log.info("usage-statistics.dbfile: {}",
configurationService.getProperty("usage-statistics.dbfile"));
HttpSolrClient server = null;
if (configurationService.getProperty("solr-statistics.server") != null) {
if (statisticsCoreURL != null) {
try {
server = new HttpSolrClient.Builder(configurationService.getProperty("solr-statistics.server")).build();
server = new HttpSolrClient.Builder(statisticsCoreURL).build();
} catch (Exception e) {
log.error("Error accessing Solr server configured in 'solr-statistics.server'", e);
}
@@ -1159,7 +1184,7 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
filterQuery.append(")");
Map<String, String> yearQueryParams = new HashMap<String, String>();
Map<String, String> yearQueryParams = new HashMap<>();
yearQueryParams.put(CommonParams.Q, "*:*");
yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000));
yearQueryParams.put(CommonParams.FQ, filterQuery.toString());
@@ -1170,7 +1195,7 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
yearQueryParams.put("csv.mv.separator", MULTIPLE_VALUES_SPLITTER);
//Start by creating a new core
String coreName = "statistics-" + dcStart.getYearUTC();
String coreName = statisticsCoreBase + "-" + dcStart.getYearUTC();
HttpSolrClient statisticsYearServer = createCore((HttpSolrClient) solr, coreName);
System.out.println("Moving: " + totalRecords + " into core " + coreName);
@@ -1198,12 +1223,12 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
for (File tempCsv : filesToUpload) {
//Upload the data in the csv files to our new solr core
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv");
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update");
contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8");
contentStreamUpdateRequest.setParam("escape", "\\");
contentStreamUpdateRequest.setParam("skip", "_version_");
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");
contentStreamUpdateRequest.addFile(tempCsv, "text/csv;charset=utf-8");
//Add parsing directives for the multivalued fields so that they are stored as separate values
// instead of one value
@@ -1223,39 +1248,42 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
solr.deleteByQuery(filterQuery.toString());
solr.commit(true, true);
log.info("Moved " + totalRecords + " records into core: " + coreName);
log.info("Moved {} records into core: {}", totalRecords, coreName);
}
FileUtils.deleteDirectory(tempDirectory);
}
protected HttpSolrClient createCore(HttpSolrClient solr, String coreName) throws IOException, SolrServerException {
String solrDir = configurationService.getProperty("dspace.dir") + File.separator + "solr" + File.separator;
String baseSolrUrl = solr.getBaseURL().replace("statistics", "");
protected HttpSolrClient createCore(HttpSolrClient solr, String coreName)
throws IOException, SolrServerException {
String baseSolrUrl = solr.getBaseURL().replace(statisticsCoreBase, ""); // Has trailing slash
//DS-3458: Test to see if a solr core already exists. If it exists, return that server. Otherwise create a
// new one.
HttpSolrClient returnServer = new HttpSolrClient.Builder(baseSolrUrl + "/" + coreName).build();
//DS-3458: Test to see if a solr core already exists. If it exists,
// return a connection to that core. Otherwise create a new core and
// return a connection to it.
HttpSolrClient returnServer = new HttpSolrClient.Builder(baseSolrUrl + coreName).build();
try {
SolrPingResponse ping = returnServer.ping();
log.debug(String.format("Ping of Solr Core [%s] Returned with Status [%d]", coreName, ping.getStatus()));
log.debug("Ping of Solr Core {} returned with Status {}",
coreName, ping.getStatus());
return returnServer;
} catch (Exception e) {
log.debug(String.format("Ping of Solr Core [%s] Failed with [%s]. New Core Will be Created", coreName,
e.getClass().getName()));
} catch (IOException | RemoteSolrException | SolrServerException e) {
log.debug("Ping of Solr Core {} failed with {}. New Core Will be Created",
coreName, e.getClass().getName());
}
//Unfortunately, this class is documented as "experimental and subject to change" on the Lucene website.
//http://lucene.apache.org/solr/4_4_0/solr-solrj/org/apache/solr/client/solrj/request/CoreAdminRequest.html
CoreAdminRequest.Create create = new CoreAdminRequest.Create();
create.setCoreName(coreName);
String configSetName = configurationService
.getProperty("solr-statistics.configset", "statistics");
create.setConfigSet(configSetName);
create.setInstanceDir(coreName);
//The config files for a statistics shard reside wihtin the statistics repository
create.setInstanceDir("statistics");
create.setDataDir(solrDir + coreName + File.separator + "data");
HttpSolrClient solrServer = new HttpSolrClient.Builder(baseSolrUrl).build();
create.process(solrServer);
log.info("Created core with name: " + coreName);
log.info("Created core with name: {} from configset {}", coreName, configSetName);
return returnServer;
}
@@ -1390,10 +1418,10 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
//Add all the separate csv files
for (File tempCsv : tempCsvFiles) {
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv");
contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update");
contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8");
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");
contentStreamUpdateRequest.addFile(tempCsv, "text/csv;charset=utf-8");
solr.request(contentStreamUpdateRequest);
}
@@ -1520,43 +1548,49 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
}
/*
* The statistics shards should not be initialized until all tomcat webapps are fully initialized.
* DS-3457 uncovered an issue in DSpace 6x in which this code triggered tomcat to hang when statistics shards are
* present.
* This code is synchonized in the event that 2 threads trigger the initialization at the same time.
* The statistics shards should not be initialized until all tomcat webapps
* are fully initialized. DS-3457 uncovered an issue in DSpace 6x in which
* this code triggered Tomcat to hang when statistics shards are present.
* This code is synchonized in the event that 2 threads trigger the
* initialization at the same time.
*/
protected synchronized void initSolrYearCores() {
if (statisticYearCoresInit || !(solr instanceof HttpSolrClient)) {
return;
}
try {
//Base url should like : http://localhost:{port.number}/solr
String baseSolrUrl = ((HttpSolrClient) solr).getBaseURL().replace(statisticsCoreBase, "");
try (HttpSolrClient enumClient = new HttpSolrClient.Builder(baseSolrUrl).build();) {
//Attempt to retrieve all the statistic year cores
File solrDir = new File(
configurationService.getProperty("dspace.dir") + File.separator + "solr" + File.separator);
File[] solrCoreFiles = solrDir.listFiles(new FileFilter() {
@Override
public boolean accept(File file) {
//Core name example: statistics-2008
return file.getName().matches("statistics-\\d\\d\\d\\d");
CoreAdminRequest coresRequest = new CoreAdminRequest();
coresRequest.setAction(CoreAdminAction.STATUS);
CoreAdminResponse coresResponse = coresRequest.process(enumClient);
NamedList<Object> response = coresResponse.getResponse();
NamedList<Object> coreStatuses = (NamedList<Object>) response.get("status");
List<String> statCoreNames = new ArrayList<>(coreStatuses.size());
for (Map.Entry<String, Object> coreStatus : coreStatuses) {
String coreName = coreStatus.getKey();
if (coreName.startsWith(statisticsCoreBase)) {
statCoreNames.add(coreName);
}
});
//Base url should like : http://localhost:{port.number}/solr
String baseSolrUrl = ((HttpSolrClient) solr).getBaseURL().replace("statistics", "");
for (File solrCoreFile : solrCoreFiles) {
log.info("Loading core with name: " + solrCoreFile.getName());
}
createCore((HttpSolrClient) solr, solrCoreFile.getName());
for (String statCoreName : statCoreNames) {
log.info("Loading core with name: " + statCoreName);
createCore((HttpSolrClient) solr, statCoreName);
//Add it to our cores list so we can query it !
statisticYearCores
.add(baseSolrUrl.replace("http://", "").replace("https://", "") + solrCoreFile.getName());
.add(baseSolrUrl.replace("http://", "").replace("https://", "") + statCoreName);
}
//Also add the core containing the current year !
statisticYearCores.add(((HttpSolrClient) solr)
.getBaseURL()
.replace("http://", "")
.replace("https://", ""));
} catch (Exception e) {
} catch (IOException | SolrServerException e) {
log.error(e.getMessage(), e);
}
statisticYearCoresInit = true;

View File

@@ -0,0 +1,16 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
/**
* Facilities for detecting and storing both model and usage events, and for
* querying the store.
*
* See {@link SolrLoggerUsageEventListener} and {@link StatisticsLoggingConsumer}
* for event capture, and {@link SolrLoggerServiceImpl} for storing and querying.
*/
package org.dspace.statistics;

View File

@@ -0,0 +1,125 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.UUID;
import com.opencsv.CSVReader;
import com.opencsv.CSVWriter;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
/**
* Repair various problems in a statistics core export CSV.
* @author Mark H. Wood <mwood@iupui.edu>
*/
public class RepairDump {
private RepairDump() {}
/**
* Repair known classes of problems with exported statistics.
* Reads standard input, writes repaired CSV to standard output.
* @param args
*/
public static void main( String[] args ) {
long recordCount = 0; // Input record counter.
long repairCount = 0; // Repaired records counter.
boolean verbose; // Give more information about what's happening.
// Analyze the command line.
Options options = new Options();
options.addOption("h", "help", false, "Give help on options.");
options.addOption("v", "verbose", false, "Write extra information to standard error.");
CommandLine command = null;
try {
command = new DefaultParser().parse(options, args);
} catch (ParseException ex) {
System.err.println(ex.getMessage());
System.exit(1);
}
if (command.hasOption("h")) {
giveHelp(options);
System.exit(0);
}
verbose = command.hasOption("v");
// Copy standard in to standard out, fixing problems.
try (
Reader input = new InputStreamReader(System.in, StandardCharsets.UTF_8);
CSVReader csvReader = new CSVReader(input);
Writer output = new OutputStreamWriter(System.out, StandardCharsets.UTF_8);
CSVWriter csvWriter = new CSVWriter(output);
) {
// Read the column headers.
String[] fields = csvReader.readNext();
// Which column is "uid"?
int uidIndex = -1;
for (int i = 0; i < fields.length; i++) {
if (fields[i].equals("uid")) {
uidIndex = i;
break;
}
}
if (uidIndex < 0) {
System.err.println("Error: input contains no 'uid' column.");
System.exit(1);
}
// Copy the headers to output.
csvWriter.writeNext(fields);
// Copy records to output, repairing any problems that we find.
while (null != (fields = csvReader.readNext())) {
recordCount++;
// Set 'uid' to a new random UUID if empty.
if (fields.length < uidIndex + 1) { // Too short to have 'uid'
fields = Arrays.copyOf(fields, uidIndex);
}
if (StringUtils.isBlank(fields[uidIndex])) { // 'uid' field is empty.
if (verbose) {
System.err.format("Missing 'uid' at record %d%n", recordCount);
}
fields[uidIndex] = UUID.randomUUID().toString();
repairCount++;
}
// Write repaired record.
csvWriter.writeNext(fields);
}
} catch (IOException ex) {
System.err.format("Could not read the export at record %d: ", recordCount);
System.err.println(ex.getMessage());
} finally {
System.err.format("Repaired %d out of %d records.%n",
repairCount, recordCount);
}
}
private static void giveHelp(Options options) {
String className = MethodHandles.lookup().lookupClass().getCanonicalName();
new HelpFormatter().printHelp(className + " [options]", options);
}
}

View File

@@ -0,0 +1,40 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.util;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.ZoneOffset;
import java.util.TimeZone;
/**
* Common constants and static methods for working with Solr.
*
* @author Mark H. Wood <mwood@iupui.edu>
*/
public class SolrUtils {
/** Solr uses UTC always. */
public static final TimeZone SOLR_TIME_ZONE = TimeZone.getTimeZone(ZoneOffset.UTC);
/** Restricted ISO 8601 format used by Solr. */
public static final String SOLR_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
/** Do not instantiate. */
private SolrUtils() { }
/**
* Create a formatter configured for Solr-style date strings and the UTC time zone.
* @see SOLR_DATE_FORMAT
*
* @return date formatter compatible with Solr.
*/
public static DateFormat getDateFormatter() {
return new SimpleDateFormat(SolrUtils.SOLR_DATE_FORMAT);
}
}

View File

@@ -56,6 +56,7 @@ import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.util.SolrUtils;
import org.dspace.xoai.exceptions.CompilingException;
import org.dspace.xoai.services.api.CollectionsService;
import org.dspace.xoai.services.api.cache.XOAICacheService;
@@ -313,7 +314,9 @@ public class XOAI {
}
System.out.println("Total: " + i + " items");
if (i > 0) {
server.add(list);
if (!list.isEmpty()) {
server.add(list);
}
server.commit(true, true);
list.clear();
}
@@ -413,7 +416,8 @@ public class XOAI {
* relevant policy dates and the standard lastModified date and take the
* most recent of those which have already passed.
*/
doc.addField("item.lastmodified", this.getMostRecentModificationDate(item));
doc.addField("item.lastmodified", SolrUtils.getDateFormatter()
.format(this.getMostRecentModificationDate(item)));
if (item.getSubmitter() != null) {
doc.addField("item.submitter", item.getSubmitter().getEmail());

View File

@@ -15,6 +15,10 @@ solr-statistics.server = ${solr.server}/statistics
# A comma-separated list that contains the bundles for which the bitstreams will be displayed
solr-statistics.query.filter.bundles=ORIGINAL
# Name of the "configset" (bundle of template core files) which will be used to
# create new Solr cores when sharding the statistics data.
solr-statistics.configset = statistics
# control solr statistics querying to filter out spider IPs
# false by default
#solr-statistics.query.filter.spiderIp = false
@@ -30,4 +34,4 @@ solr-statistics.spiderips.urls = http://iplists.com/google.txt, \
http://iplists.com/infoseek.txt, \
http://iplists.com/altavista.txt, \
http://iplists.com/excite.txt, \
http://iplists.com/misc.txt
http://iplists.com/misc.txt

View File

@@ -143,14 +143,17 @@
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_ti" type="int" indexed="true" stored="true"/>
<dynamicField name="*_tl" type="long" indexed="true" stored="true"/>
<dynamicField name="*_tf" type="float" indexed="true" stored="true"/>
<dynamicField name="*_td" type="double" indexed="true" stored="true"/>
<dynamicField name="*_tdt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
<dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="random_*" type="random" />
</fields>

View File

@@ -321,7 +321,6 @@
<dynamicField name="*_tl" type="long" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_tf" type="float" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_td" type="double" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_tdt" type="date" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>

View File

@@ -82,10 +82,6 @@ Common usage:
<!-- Default location of GeoLite2 City database to download. This may be overridden, if URL path changes. -->
<property name="geolite" value="http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.tar.gz" />
<!-- Default location of lucene-core JAR to download (for update_solr_indexes). This may be overridden, if URL path changes. -->
<!-- NOTE: this URL should have the version of the JAR replaced with "[version]" -->
<property name="lucene-core" value="http://search.maven.org/remotecontent?filepath=org/apache/lucene/lucene-core/[version]/lucene-core-[version].jar" />
<!-- ============================================================= -->
<!-- The DSpace class path for executing installation targets -->
@@ -124,7 +120,6 @@ Common usage:
<echo message="update_geolite --> Dowload and install GeoCity database into ${dspace.dir}/config" />
<echo message="update_code --> Update compiled code (bin, lib, and etc directories)" />
<echo message="update_webapps --> Update web applications" />
<echo message="update_solr_indexes --> Checks if any Solr indexes need upgrading (to latest Solr), and if so, upgrades them." />
<echo message="" />
<echo message="init_configs --> Write the configuration files to ${dspace.dir}/config" />
<echo message="install_code --> Install compiled code into ${dspace.dir}" />
@@ -173,7 +168,9 @@ Common usage:
<!-- Update an installation -->
<!-- ============================================================= -->
<target name="update" depends="update_configs,update_code,test_database,update_webapps,update_solr_indexes" description="Update installed code and web applications (without clobbering data/config)">
<target name="update"
depends="update_configs,update_code,test_database,update_webapps"
description="Update installed code and web applications (without clobbering data/config)">
</target>
<!-- ============================================================= -->
@@ -371,7 +368,10 @@ Common usage:
</target>
<target name="overwrite_solr_configs" description="Overwrites a configuration directory." if="${overwrite}" depends="copy_solr_configs_keep">
<target name="overwrite_solr_configs"
description="Overwrites a configuration directory."
if="${overwrite}"
depends="copy_solr_configs_keep">
<!--
Copy files that are absent in target
@@ -920,261 +920,4 @@ You may manually install this file by following these steps:
<antcall target="update_geolite" />
</target>
<!-- Check if any Solr indexes need updating to the version of
Solr/Lucene we are using. -->
<target name="update_solr_indexes">
<echo>Checking if any Solr indexes (${dspace.dir}/solr/*) need upgrading...</echo>
<!--
For each index:
(1) Ensure the index is upgraded to Solr/Lucene 3.5.0
(really old indexes need upgrading to this version first)
(2) Upgrade from 3.5.0 through each major version to
latest version of Solr-J used in DSpace.
-->
<!-- Determine what version of Solr/Lucene is being used in DSpace. -->
<java classname="org.dspace.app.util.IndexVersion"
classpathref="class.path"
fork="yes"
outputproperty="latest_version">
<sysproperty key="log4j.configurationFile"
value="file:config/log4j2-console.xml" />
<sysproperty key="dspace.log.init.disable" value="true" />
<arg value="-v" />
</java>
<echo>Current version of Solr/Lucene: ${latest_version}</echo>
<!-- Now, update each index. -->
<foreach target='update_a_solr_collection'
param='indexDir'>
<path id='solr.collection.path'>
<dirset dir='${dspace.dir}/solr'>
<include name='*/data/index'/>
</dirset>
</path>
</foreach>
<!-- Finally, cleanup any Lucene JARs that were downloaded
into the Ant build directory. (These JARs are
automatically downloaded when an index needs an upgrade). -->
<echo>Cleanup any downloaded lucene-core-*.jar files. We don't need them anymore.</echo>
<delete>
<fileset dir="." includes="lucene-core-*.jar"/>
</delete>
</target>
<!-- Check the version of a single index and apply all needed updates. -->
<!-- REQUIRES these params: -->
<!-- * indexDir = Full path to Index directory -->
<target name='update_a_solr_collection'>
<if>
<available file="${indexDir}" type="dir"/>
<then>
<!-- Ensure that index is >= Solr/Lucene 3.5.0 -->
<antcall target="check_solr_index">
<param name="indexDir" value="${indexDir}"/>
<param name="version" value="3.5.0"/>
<param name="included" value="false"/>
</antcall>
<!-- Ensure that index is >= Solr/Lucene 4.10.0 -->
<antcall target="check_solr_index">
<param name="indexDir" value="${indexDir}"/>
<param name="version" value="4.10.0"/>
<param name="included" value="false"/>
</antcall>
<!-- Ensure that index is >= Solr/Lucene 5.5.0 -->
<antcall target="check_solr_index">
<param name="indexDir" value="${indexDir}"/>
<param name="version" value="5.5.0"/>
<param name="included" value="false"/>
</antcall>
<!-- Ensure that index is >= Solr/Lucene 6.6.0 -->
<antcall target="check_solr_index">
<param name="indexDir" value="${indexDir}"/>
<param name="version" value="6.6.0"/>
<param name="included" value="false"/>
</antcall>
<!-- Ensure that index is upgraded to the version included in DSpace. -->
<antcall target="check_solr_index">
<param name="indexDir" value="${indexDir}"/>
<param name="version" value="${latest_version}"/>
<param name="included" value="true"/>
</antcall>
</then>
</if>
</target>
<!-- Target to check an existing Solr index to see if it -->
<!-- meets a particular version requirement. -->
<!-- If the index is outdated, "upgrade_solr_index" is -->
<!-- called to upgrade it to the specified version. -->
<!-- REQUIRES these params: -->
<!-- * indexDir = Full path to Index directory -->
<!-- * version = Version of Solr to check against. -->
<!-- * included = Whether this version of Solr/Lucene is already -->
<!-- included in DSpace classpath. -->
<target name="check_solr_index">
<!-- Check if the Solr Statistics index is AT LEAST compatible
with Solr/Lucene version 3.5 -->
<echo>Checking if the Solr index at ${indexDir} is >= Solr ${version}</echo>
<java classname="org.dspace.app.util.IndexVersion"
classpathref="class.path"
fork="yes"
resultproperty="version_returncode"
outputproperty="version_compare">
<sysproperty key="log4j.configurationFile"
value="file:config/log4j2-console.xml" />
<sysproperty key="dspace.log.init.disable" value="true" />
<arg value="${indexDir}" />
<arg value="${version}" />
</java>
<!-- Fail if the previous command returns a non-zero result, as this
means we couldn't determine the Solr index version.
We need this special error handling, since we are capturing the
output in ${version_compare}. -->
<fail>
<condition>
<not>
<equals arg1="${version_returncode}" arg2="0"/>
</not>
</condition>
ERROR occurred while checking Solr index version:
${version_compare}
</fail>
<!-- If the above java command returned -1, that means this
index is NOT yet upgraded to the specified Solr
version. So, let's upgrade it! -->
<if>
<equals arg1="${version_compare}" arg2="-1"/>
<then>
<echo message="The Solr index in ${indexDir} needs an upgrade to Solr ${version}"/>
<!-- Call 'upgrade_solr_index' to actually upgrade the index-->
<antcall target="upgrade_solr_index">
<param name="indexDir" value="${indexDir}"/>
<param name="version" value="${version}"/>
<param name="included" value="${included}"/>
</antcall>
</then>
<else>
<echo message="The Solr index in ${indexDir} IS >= Solr ${version}. Looks good!"/>
</else>
</if>
</target>
<!-- Target to actually *upgrade* an existing Solr index -->
<!-- REQUIRES these params: -->
<!-- * indexDir = Full path to Index directory -->
<!-- * version = Version of Solr to upgrade to -->
<!-- * included = Whether this version of Solr/Lucene is already -->
<!-- included in DSpace classpath. If 'false', then -->
<!-- the appropriate Lucene JAR will be downloaded. -->
<target name="upgrade_solr_index">
<echo>Upgrading Solr/Lucene Index at ${indexDir} to Solr/Lucene ${version}.</echo>
<!-- Replace the "[version]" placeholders in ${lucene-core} with the actual ${version}-->
<propertyregex property="lucene-core.jar"
input="${lucene-core}"
regexp="\[version\]"
replace="${version}"
global="true"/>
<!-- Download the appropriate version of the lucene-core.jar, if we
haven't already AND it's not included on our DSpace classpath. -->
<if>
<and>
<not>
<available file="./lucene-core-${version}.jar"/>
</not>
<equals arg1="${included}" arg2="false" casesensitive="false"/>
</and>
<then>
<echo>Downloading ${lucene-core.jar}</echo>
<trycatch property="lucene.download.error">
<try>
<get src="${lucene-core.jar}"
dest="./lucene-core-${version}.jar"
verbose="true"/>
</try>
<catch>
<echo>
====================================================================
WARNING : FAILED TO DOWNLOAD LUCENE-CORE.JAR
(Needed to upgrade your existing Solr indexes)
Underlying Error: ${lucene.download.error}
In order to upgrade your Solr indexes to the latest version,
you will need to do the following:
(1) Manually download the lucene-core-${version}.jar at:
${lucene-core.jar}
(2) Place the JAR in the "[src]/dspace/target/dspace-installer" directory.
[src]/dspace/target/dspace-installer/${lucene-core}.jar
(3) Manually re-run the following 'ant' upgrade command:
ant update_solr_indexes
For more information, please see the Upgrade Instructions.
====================================================================
</echo>
</catch>
</trycatch>
</then>
</if>
<!-- If we downloaded a "lucene-core-*.jar" in the previous step,
then use that downloaded JAR to upgrade the Solr/Lucene Index -->
<if>
<and>
<available file="./lucene-core-${version}.jar"/>
<equals arg1="${included}" arg2="false" casesensitive="false"/>
</and>
<then>
<echo>Upgrading the Solr index in ${indexDir}. Depending on the index size, this may take a while (please be patient)...</echo>
<!-- Run the Lucene IndexUpgrader on this index. This
will upgrade the index based on the version of
"lucene-core.jar" -->
<java classname="org.apache.lucene.index.IndexUpgrader"
classpath="./lucene-core-${version}.jar"
fork="yes"
failonerror="yes">
<sysproperty key="log4j.configurationFile"
value="file:config/log4j2-console.xml" />
<sysproperty key="dspace.log.init.disable"
value="true" />
<arg value="${indexDir}" />
</java>
</then>
</if>
<!-- Otherwise, if "included=true" then use the Lucene JAR included
in the DSpace classpath to upgrade the Solr/Lucene Index -->
<if>
<equals arg1="${included}" arg2="true" casesensitive="false"/>
<then>
<echo>Upgrading the Solr index in ${indexDir}. Depending on the index size, this may take a while (please be patient)...</echo>
<!-- Run the Lucene IndexUpgrader on this index. This
will upgrade the index based on the version of
"lucene-core.jar" -->
<java classname="org.apache.lucene.index.IndexUpgrader"
classpathref="class.path"
fork="yes"
failonerror="yes">
<sysproperty key="log4j.configurationFile"
value="file:config/log4j2-console.xml" />
<sysproperty key="dspace.log.init.disable"
value="true" />
<arg value="${indexDir}" />
</java>
</then>
</if>
</target>
</project>