[DS-3456] 6x Fix Command Line Parameters for statistics import/export tools (#1624)

* Clarify command line args

* support flexible import/export of stats

* Fix DS-3464 solr-reindex-statistics for shard

* Preserve multi val fields on import/export

* Time zone consistency in shard name creation

* Migrate PR feedback from 5x to 6x

* whitespace
This commit is contained in:
Terry Brady
2017-02-08 07:43:03 -08:00
committed by Tim Donohue
parent 9eb7c6734c
commit 8d76aa2010
3 changed files with 68 additions and 46 deletions

View File

@@ -1221,7 +1221,7 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
yearQueryParams.put(CommonParams.WT, "csv"); yearQueryParams.put(CommonParams.WT, "csv");
//Start by creating a new core //Start by creating a new core
String coreName = "statistics-" + dcStart.getYear(); String coreName = "statistics-" + dcStart.getYearUTC();
HttpSolrServer statisticsYearServer = createCore(solr, coreName); HttpSolrServer statisticsYearServer = createCore(solr, coreName);
System.out.println("Moving: " + totalRecords + " into core " + coreName); System.out.println("Moving: " + totalRecords + " into core " + coreName);
@@ -1236,7 +1236,7 @@ public class SolrLoggerServiceImpl implements SolrLoggerService, InitializingBea
HttpResponse response = new DefaultHttpClient().execute(get); HttpResponse response = new DefaultHttpClient().execute(get);
InputStream csvInputstream = response.getEntity().getContent(); InputStream csvInputstream = response.getEntity().getContent();
//Write the csv ouput to a file ! //Write the csv ouput to a file !
File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear() + "." + i + ".csv"); File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYearUTC() + "." + i + ".csv");
FileUtils.copyInputStreamToFile(csvInputstream, csvFile); FileUtils.copyInputStreamToFile(csvInputstream, csvFile);
filesToUpload.add(csvFile); filesToUpload.add(csvFile);

View File

@@ -45,6 +45,7 @@ public class SolrImportExport
private static final ThreadLocal<DateFormat> SOLR_DATE_FORMAT; private static final ThreadLocal<DateFormat> SOLR_DATE_FORMAT;
private static final ThreadLocal<DateFormat> SOLR_DATE_FORMAT_NO_MS; private static final ThreadLocal<DateFormat> SOLR_DATE_FORMAT_NO_MS;
private static final ThreadLocal<DateFormat> EXPORT_DATE_FORMAT; private static final ThreadLocal<DateFormat> EXPORT_DATE_FORMAT;
private static final String EXPORT_SEP = "_export_";
static static
{ {
@@ -74,6 +75,7 @@ public class SolrImportExport
private static final String ACTION_OPTION = "a"; private static final String ACTION_OPTION = "a";
private static final String CLEAR_OPTION = "c"; private static final String CLEAR_OPTION = "c";
private static final String OVERWRITE_OPTION = "f";
private static final String DIRECTORY_OPTION = "d"; private static final String DIRECTORY_OPTION = "d";
private static final String HELP_OPTION = "h"; private static final String HELP_OPTION = "h";
private static final String INDEX_NAME_OPTION = "i"; private static final String INDEX_NAME_OPTION = "i";
@@ -82,6 +84,8 @@ public class SolrImportExport
public static final int ROWS_PER_FILE = 10_000; public static final int ROWS_PER_FILE = 10_000;
private static final String MULTIPLE_VALUES_SPLITTER = ",";
private static final Logger log = Logger.getLogger(SolrImportExport.class); private static final Logger log = Logger.getLogger(SolrImportExport.class);
/** /**
@@ -102,12 +106,16 @@ public class SolrImportExport
printHelpAndExit(options, 0); printHelpAndExit(options, 0);
} }
if (!line.hasOption(INDEX_NAME_OPTION)) String[] indexNames = {"statistics"};
if (line.hasOption(INDEX_NAME_OPTION))
{ {
System.err.println("This command requires the index-name option but none was present."); indexNames = line.getOptionValues(INDEX_NAME_OPTION);
printHelpAndExit(options, 1);
} }
String[] indexNames = line.getOptionValues(INDEX_NAME_OPTION); else
{
System.err.println("No index name provided, defaulting to \"statistics\".");
}
String directoryName = makeDirectoryName(line.getOptionValue(DIRECTORY_OPTION)); String directoryName = makeDirectoryName(line.getOptionValue(DIRECTORY_OPTION));
@@ -128,7 +136,7 @@ public class SolrImportExport
{ {
String solrUrl = makeSolrUrl(indexName); String solrUrl = makeSolrUrl(indexName);
boolean clear = line.hasOption(CLEAR_OPTION); boolean clear = line.hasOption(CLEAR_OPTION);
importIndex(indexName, importDir, solrUrl, clear, clear); importIndex(indexName, importDir, solrUrl, clear);
} }
catch (IOException | SolrServerException | SolrImportExportException e) catch (IOException | SolrServerException | SolrImportExportException e)
{ {
@@ -166,7 +174,7 @@ public class SolrImportExport
{ {
String solrUrl = makeSolrUrl(indexName); String solrUrl = makeSolrUrl(indexName);
String timeField = makeTimeField(indexName); String timeField = makeTimeField(indexName);
exportIndex(indexName, exportDir, solrUrl, timeField, lastValue); exportIndex(indexName, exportDir, solrUrl, timeField, lastValue, line.hasOption(OVERWRITE_OPTION));
} }
catch (SolrServerException | IOException | SolrImportExportException e) catch (SolrServerException | IOException | SolrImportExportException e)
{ {
@@ -181,7 +189,8 @@ public class SolrImportExport
{ {
try { try {
boolean keepExport = line.hasOption(KEEP_OPTION); boolean keepExport = line.hasOption(KEEP_OPTION);
reindex(indexName, directoryName, keepExport); boolean overwrite = line.hasOption(OVERWRITE_OPTION);
reindex(indexName, directoryName, keepExport, overwrite);
} catch (IOException | SolrServerException | SolrImportExportException e) { } catch (IOException | SolrServerException | SolrImportExportException e) {
e.printStackTrace(); e.printStackTrace();
} }
@@ -204,6 +213,7 @@ public class SolrImportExport
Options options = new Options(); Options options = new Options();
options.addOption(ACTION_OPTION, "action", true, "The action to perform: import, export or reindex. Default: export."); options.addOption(ACTION_OPTION, "action", true, "The action to perform: import, export or reindex. Default: export.");
options.addOption(CLEAR_OPTION, "clear", false, "When importing, also clear the index first. Ignored when action is export or reindex."); options.addOption(CLEAR_OPTION, "clear", false, "When importing, also clear the index first. Ignored when action is export or reindex.");
options.addOption(OVERWRITE_OPTION, "force-overwrite", false, "When exporting or re-indexing, allow overwrite of existing export files");
options.addOption(DIRECTORY_OPTION, "directory", true, options.addOption(DIRECTORY_OPTION, "directory", true,
"The absolute path for the directory to use for import or export. If omitted, [dspace]/solr-export is used."); "The absolute path for the directory to use for import or export. If omitted, [dspace]/solr-export is used.");
options.addOption(HELP_OPTION, "help", false, "Get help on options for this command."); options.addOption(HELP_OPTION, "help", false, "Get help on options for this command.");
@@ -227,8 +237,9 @@ public class SolrImportExport
* @param exportDirName the name of the directory to use for export. If this directory doesn't exist, it will be created. * @param exportDirName the name of the directory to use for export. If this directory doesn't exist, it will be created.
* @param keepExport whether to keep the contents of the exportDir after the reindex. If keepExport is false and the * @param keepExport whether to keep the contents of the exportDir after the reindex. If keepExport is false and the
* export directory was created by this method, the export directory will be deleted at the end of the reimport. * export directory was created by this method, the export directory will be deleted at the end of the reimport.
* @param overwrite allow export files to be overwritten during re-index
*/ */
private static void reindex(String indexName, String exportDirName, boolean keepExport) private static void reindex(String indexName, String exportDirName, boolean keepExport, boolean overwrite)
throws IOException, SolrServerException, SolrImportExportException { throws IOException, SolrServerException, SolrImportExportException {
String tempIndexName = indexName + "-temp"; String tempIndexName = indexName + "-temp";
@@ -236,7 +247,10 @@ public class SolrImportExport
String baseSolrUrl = StringUtils.substringBeforeLast(origSolrUrl, "/"); // need to get non-core solr URL String baseSolrUrl = StringUtils.substringBeforeLast(origSolrUrl, "/"); // need to get non-core solr URL
String tempSolrUrl = baseSolrUrl + "/" + tempIndexName; String tempSolrUrl = baseSolrUrl + "/" + tempIndexName;
String solrInstanceDir = ConfigurationManager.getProperty("dspace.dir") + File.separator + "solr" + File.separator + indexName; //The configuration details for the statistics shards reside within the "statistics" folder
String instanceIndexName = indexName.startsWith("statistics-") ? "statistics" : indexName;
String solrInstanceDir = ConfigurationManager.getProperty("dspace.dir") + File.separator + "solr" + File.separator + instanceIndexName;
// the [dspace]/solr/[indexName]/conf directory needs to be available on the local machine for this to work // the [dspace]/solr/[indexName]/conf directory needs to be available on the local machine for this to work
// -- we need access to the schema.xml and solrconfig.xml file, plus files referenced from there // -- we need access to the schema.xml and solrconfig.xml file, plus files referenced from there
// if this directory can't be found, output an error message and skip this index // if this directory can't be found, output an error message and skip this index
@@ -322,10 +336,10 @@ public class SolrImportExport
try try
{ {
// export from the actual core (from temp core name, actual data dir) // export from the actual core (from temp core name, actual data dir)
exportIndex(indexName, exportDir, tempSolrUrl, timeField); exportIndex(indexName, exportDir, tempSolrUrl, timeField, overwrite);
// clear actual core (temp core name, clearing actual data dir) & import // clear actual core (temp core name, clearing actual data dir) & import
importIndex(indexName, exportDir, tempSolrUrl, true, true); importIndex(indexName, exportDir, tempSolrUrl, true);
} }
catch (Exception e) catch (Exception e)
{ {
@@ -347,9 +361,9 @@ public class SolrImportExport
// export all docs from now-temp core into export directory -- this won't cause name collisions with the actual export // export all docs from now-temp core into export directory -- this won't cause name collisions with the actual export
// because the core name for the temporary export has -temp in it while the actual core doesn't // because the core name for the temporary export has -temp in it while the actual core doesn't
exportIndex(tempIndexName, exportDir, tempSolrUrl, timeField); exportIndex(tempIndexName, exportDir, tempSolrUrl, timeField, overwrite);
// ...and import them into the now-again-actual core *without* clearing // ...and import them into the now-again-actual core *without* clearing
importIndex(tempIndexName, exportDir, origSolrUrl, false, true); importIndex(tempIndexName, exportDir, origSolrUrl, false);
// commit changes // commit changes
origSolr.commit(); origSolr.commit();
@@ -381,13 +395,14 @@ public class SolrImportExport
* @param toDir The target directory for the export. Will be created if it doesn't exist yet. The directory must be writeable. * @param toDir The target directory for the export. Will be created if it doesn't exist yet. The directory must be writeable.
* @param solrUrl The solr URL for the index to export. Must not be null. * @param solrUrl The solr URL for the index to export. Must not be null.
* @param timeField The time field to use for sorting the export. Must not be null. * @param timeField The time field to use for sorting the export. Must not be null.
* @param overwrite If set, allow export files to be overwritten
* @throws SolrServerException if there is a problem with exporting the index. * @throws SolrServerException if there is a problem with exporting the index.
* @throws IOException if there is a problem creating the files or communicating with Solr. * @throws IOException if there is a problem creating the files or communicating with Solr.
* @throws SolrImportExportException if there is a problem in communicating with Solr. * @throws SolrImportExportException if there is a problem in communicating with Solr.
*/ */
public static void exportIndex(String indexName, File toDir, String solrUrl, String timeField) public static void exportIndex(String indexName, File toDir, String solrUrl, String timeField, boolean overwrite)
throws SolrServerException, SolrImportExportException, IOException { throws SolrServerException, SolrImportExportException, IOException {
exportIndex(indexName, toDir, solrUrl, timeField, null); exportIndex(indexName, toDir, solrUrl, timeField, null, overwrite);
} }
/** /**
@@ -403,7 +418,7 @@ public class SolrImportExport
* @throws SolrServerException if there is a problem reading the files or communicating with Solr. * @throws SolrServerException if there is a problem reading the files or communicating with Solr.
* @throws SolrImportExportException if there is a problem communicating with Solr. * @throws SolrImportExportException if there is a problem communicating with Solr.
*/ */
public static void importIndex(final String indexName, File fromDir, String solrUrl, boolean clear, boolean overwrite) public static void importIndex(final String indexName, File fromDir, String solrUrl, boolean clear)
throws IOException, SolrServerException, SolrImportExportException throws IOException, SolrServerException, SolrImportExportException
{ {
if (StringUtils.isBlank(solrUrl)) if (StringUtils.isBlank(solrUrl))
@@ -433,7 +448,7 @@ public class SolrImportExport
@Override @Override
public boolean accept(File dir, String name) public boolean accept(File dir, String name)
{ {
return name.startsWith(indexName) && name.endsWith(".csv"); return name.startsWith(indexName + EXPORT_SEP) && name.endsWith(".csv");
} }
}); });
@@ -449,13 +464,10 @@ public class SolrImportExport
{ {
log.info("Importing file " + file.getCanonicalPath()); log.info("Importing file " + file.getCanonicalPath());
ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv");
if (overwrite) contentStreamUpdateRequest.setParam("skip", "_version_");
{
contentStreamUpdateRequest.setParam("skip", "_version_");
}
for (String mvField : multivaluedFields) { for (String mvField : multivaluedFields) {
contentStreamUpdateRequest.setParam("f." + mvField + ".split", "true"); contentStreamUpdateRequest.setParam("f." + mvField + ".split", "true");
contentStreamUpdateRequest.setParam("f." + mvField + ".escape", "\\"); contentStreamUpdateRequest.setParam("f." + mvField + ".separator", MULTIPLE_VALUES_SPLITTER);
} }
contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8"); contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8");
contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
@@ -520,14 +532,16 @@ public class SolrImportExport
* @param solrUrl The solr URL for the index to export. Must not be null. * @param solrUrl The solr URL for the index to export. Must not be null.
* @param timeField The time field to use for sorting the export. Must not be null. * @param timeField The time field to use for sorting the export. Must not be null.
* @param fromWhen Optionally, from when to export. See options for allowed values. If null or empty, all documents will be exported. * @param fromWhen Optionally, from when to export. See options for allowed values. If null or empty, all documents will be exported.
* @param overwrite If set, allow export files to be overwritten
* @throws SolrServerException if there is a problem with exporting the index. * @throws SolrServerException if there is a problem with exporting the index.
* @throws IOException if there is a problem creating the files or communicating with Solr. * @throws IOException if there is a problem creating the files or communicating with Solr.
* @throws SolrImportExportException if there is a problem in communicating with Solr. * @throws SolrImportExportException if there is a problem in communicating with Solr.
*/ */
public static void exportIndex(String indexName, File toDir, String solrUrl, String timeField, String fromWhen) public static void exportIndex(String indexName, File toDir, String solrUrl, String timeField, String fromWhen, boolean overwrite)
throws SolrServerException, IOException, SolrImportExportException throws SolrServerException, IOException, SolrImportExportException
{ {
if (StringUtils.isBlank(solrUrl)) log.info(String.format("Export Index [%s] to [%s] using [%s] Time Field[%s] FromWhen[%s]", indexName, toDir, solrUrl, timeField, fromWhen));
if (StringUtils.isBlank(solrUrl))
{ {
throw new SolrImportExportException("Could not construct solr URL for index" + indexName + ", aborting export."); throw new SolrImportExportException("Could not construct solr URL for index" + indexName + ", aborting export.");
} }
@@ -555,12 +569,14 @@ public class SolrImportExport
query.setGetFieldStatistics(timeField); query.setGetFieldStatistics(timeField);
Map<String, FieldStatsInfo> fieldInfo = solr.query(query).getFieldStatsInfo(); Map<String, FieldStatsInfo> fieldInfo = solr.query(query).getFieldStatsInfo();
if (fieldInfo == null || !fieldInfo.containsKey(timeField)) { if (fieldInfo == null || !fieldInfo.containsKey(timeField)) {
log.warn("Cannot get earliest date, not exporting index " + indexName + ", time field " + timeField + ", from " + fromWhen); log.warn(String.format("Queried [%s]. No fieldInfo found while exporting index [%s] time field [%s] from [%s]. Export cancelled.",
solrUrl, indexName, timeField, fromWhen));
return; return;
} }
FieldStatsInfo timeFieldInfo = fieldInfo.get(timeField); FieldStatsInfo timeFieldInfo = fieldInfo.get(timeField);
if (timeFieldInfo == null || timeFieldInfo.getMin() == null) { if (timeFieldInfo == null || timeFieldInfo.getMin() == null) {
log.warn("Cannot get earliest date, not exporting index " + indexName + ", time field " + timeField + ", from " + fromWhen); log.warn(String.format("Queried [%s]. No earliest date found while exporting index [%s] time field [%s] from [%s]. Export cancelled.",
solrUrl, indexName, timeField, fromWhen));
return; return;
} }
Date earliestTimestamp = (Date) timeFieldInfo.getMin(); Date earliestTimestamp = (Date) timeFieldInfo.getMin();
@@ -594,6 +610,7 @@ public class SolrImportExport
monthQuery.setRows(ROWS_PER_FILE); monthQuery.setRows(ROWS_PER_FILE);
monthQuery.set("wt", "csv"); monthQuery.set("wt", "csv");
monthQuery.set("fl", "*"); monthQuery.set("fl", "*");
monthQuery.setParam("csv.mv.separator", MULTIPLE_VALUES_SPLITTER);
monthQuery.addFilterQuery(timeField + ":[" +monthStart + " TO " + monthStart + "+1MONTH]"); monthQuery.addFilterQuery(timeField + ":[" +monthStart + " TO " + monthStart + "+1MONTH]");
@@ -603,17 +620,24 @@ public class SolrImportExport
URL url = new URL(solrUrl + "/select?" + monthQuery.toString()); URL url = new URL(solrUrl + "/select?" + monthQuery.toString());
File file = new File(toDir.getCanonicalPath(), makeExportFilename(indexName, monthStartDate, docsThisMonth, i)); File file = new File(toDir.getCanonicalPath(), makeExportFilename(indexName, monthStartDate, docsThisMonth, i));
if (file.createNewFile()) if (file.createNewFile() || overwrite)
{ {
FileUtils.copyURLToFile(url, file); FileUtils.copyURLToFile(url, file);
log.info("Exported batch " + i + " to " + file.getCanonicalPath()); String message = String.format("Solr export to file [%s] complete. Export for Index [%s] Month [%s] Batch [%d] Num Docs [%d]",
file.getCanonicalPath(), indexName, monthStart, i, docsThisMonth);
log.info(message);
}
else if (file.exists())
{
String message = String.format("Solr export file [%s] already exists. Export failed for Index [%s] Month [%s] Batch [%d] Num Docs [%d]",
file.getCanonicalPath(), indexName, monthStart, i, docsThisMonth);
throw new SolrImportExportException(message);
} }
else else
{ {
throw new SolrImportExportException("Could not create file " + file.getCanonicalPath() String message = String.format("Cannot create solr export file [%s]. Export failed for Index [%s] Month [%s] Batch [%d] Num Docs [%d]",
+ " while exporting index " + indexName file.getCanonicalPath(), indexName, monthStart, i, docsThisMonth);
+ ", month" + monthStart throw new SolrImportExportException(message);
+ ", batch " + i);
} }
} }
} }
@@ -671,15 +695,15 @@ public class SolrImportExport
*/ */
private static String makeExportFilename(String indexName, Date exportStart, long totalRecords, int index) private static String makeExportFilename(String indexName, Date exportStart, long totalRecords, int index)
{ {
String exportFileNumber = ""; String exportFileNumber = "";
if (totalRecords > ROWS_PER_FILE) { if (totalRecords > ROWS_PER_FILE) {
exportFileNumber = StringUtils.leftPad("" + (index / ROWS_PER_FILE), (int) Math.ceil(Math.log10(totalRecords / ROWS_PER_FILE)), "0"); exportFileNumber = StringUtils.leftPad("" + (index / ROWS_PER_FILE), (int) Math.ceil(Math.log10(totalRecords / ROWS_PER_FILE)), "0");
} }
return indexName return indexName
+ "_export_" + EXPORT_SEP
+ EXPORT_DATE_FORMAT.get().format(exportStart) + EXPORT_DATE_FORMAT.get().format(exportStart)
+ (StringUtils.isNotBlank(exportFileNumber) ? "_" + exportFileNumber : "") + (StringUtils.isNotBlank(exportFileNumber) ? "_" + exportFileNumber : "")
+ ".csv"; + ".csv";
} }
/** /**
@@ -733,6 +757,10 @@ public class SolrImportExport
{ {
HelpFormatter myhelp = new HelpFormatter(); HelpFormatter myhelp = new HelpFormatter();
myhelp.printHelp(SolrImportExport.class.getSimpleName() + "\n", options); myhelp.printHelp(SolrImportExport.class.getSimpleName() + "\n", options);
System.out.println("\n\nCommand Defaults");
System.out.println("\tsolr-export-statistics [-a export] [-i statistics]");
System.out.println("\tsolr-import-statistics [-a import] [-i statistics]");
System.out.println("\tsolr-reindex-statistics [-a reindex] [-i statistics]");
System.exit(exitCode); System.exit(exitCode);
} }
} }

View File

@@ -222,8 +222,6 @@
<class>org.dspace.util.SolrImportExport</class> <class>org.dspace.util.SolrImportExport</class>
<argument>-a</argument> <argument>-a</argument>
<argument>export</argument> <argument>export</argument>
<argument>-i</argument>
<argument>statistics</argument>
</step> </step>
</command> </command>
<command> <command>
@@ -233,8 +231,6 @@
<class>org.dspace.util.SolrImportExport</class> <class>org.dspace.util.SolrImportExport</class>
<argument>-a</argument> <argument>-a</argument>
<argument>import</argument> <argument>import</argument>
<argument>-i</argument>
<argument>statistics</argument>
</step> </step>
</command> </command>
<command> <command>
@@ -244,8 +240,6 @@
<class>org.dspace.util.SolrImportExport</class> <class>org.dspace.util.SolrImportExport</class>
<argument>-a</argument> <argument>-a</argument>
<argument>reindex</argument> <argument>reindex</argument>
<argument>-i</argument>
<argument>statistics</argument>
</step> </step>
</command> </command>
<command> <command>