Export SOLR Usage Statistics data to intermediate format

bin/dspace stats-util --export
This commit is contained in:
Peter Dietz
2014-05-29 16:07:37 -04:00
parent a2d513e59e
commit f57619d726
2 changed files with 70 additions and 4 deletions

View File

@@ -20,6 +20,7 @@ import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
@@ -30,11 +31,10 @@ import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.RangeFacet; import org.apache.solr.client.solrj.response.RangeFacet;
import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.*;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.dspace.content.*; import org.dspace.content.*;
import org.dspace.content.Collection; import org.dspace.content.Collection;
import org.dspace.core.ConfigurationManager; import org.dspace.core.ConfigurationManager;
@@ -51,6 +51,7 @@ import javax.servlet.http.HttpServletRequest;
import java.io.*; import java.io.*;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.sql.SQLException; import java.sql.SQLException;
import java.text.DateFormat;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.*; import java.util.*;
@@ -1495,6 +1496,66 @@ public class SolrLogger
} }
} }
public static void exportHits() throws Exception {
Context context = new Context();
try {
//First of all retrieve the total number of records to be updated
SolrQuery query = new SolrQuery();
query.setQuery("*:*");
query.setRows(0);
addAdditionalSolrYearCores(query);
long totalRecords = solr.query(query).getResults().getNumFound();
File tempDirectory = new File(ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
tempDirectory.mkdirs();
for(int i = 0; i < totalRecords; i+=10000){
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set(CommonParams.Q, "statistics_type:view");
solrParams.set(CommonParams.WT, "javabin");
solrParams.set(CommonParams.ROWS, String.valueOf(10000));
solrParams.set(CommonParams.START, String.valueOf(i));
//Have the SOLR data
QueryResponse rsp = solr.query(solrParams);
SolrDocumentList docs = rsp.getResults();
File exportOutput = new File(tempDirectory.getPath() + File.separatorChar + "usagestats_" + i + ".csv");
exportOutput.delete();
for(SolrDocument doc : docs) {
String uid = doc.get("uid").toString();
String ip = doc.get("ip").toString();
String id = doc.get("id").toString();
String type = doc.get("type").toString();
String time = doc.get("time").toString();
//20140527162409835,view_bitstream,1292,2014-05-27T16:24:09,anonymous,127.0.0.1
DSpaceObject dso = DSpaceObject.find(context, Integer.parseInt(type), Integer.parseInt(id));
//InputFormat: Mon May 19 07:21:27 EDT 2014
DateFormat inputDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy");
Date solrDate = inputDateFormat.parse(time);
//OutputFormat: 2014-05-27T16:24:09
DateFormat outputDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
String out = uid + "," + "view_" + dso.getTypeText().toLowerCase() + "," + id + "," + outputDateFormat.format(solrDate) + ",anonymous," + ip + "\n";
FileUtils.writeStringToFile(exportOutput, out, true);
}
System.out.println("Export hits [" + String.valueOf(i*10000) + " - " + String.valueOf((i+1)*10000) + "] to " + exportOutput.getCanonicalPath());
}
} catch (Exception e) {
log.error("Error while exporting SOLR data", e);
throw e;
} finally {
context.abort();
}
}
private static String generateURL(String baseURL, Map<String, String> parameters) throws UnsupportedEncodingException { private static String generateURL(String baseURL, Map<String, String> parameters) throws UnsupportedEncodingException {
boolean first = true; boolean first = true;
StringBuilder result = new StringBuilder(baseURL); StringBuilder result = new StringBuilder(baseURL);

View File

@@ -60,6 +60,7 @@ public class StatisticsClient
options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address"); options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address");
options.addOption("o", "optimize", false, "Run maintenance on the SOLR index"); options.addOption("o", "optimize", false, "Run maintenance on the SOLR index");
options.addOption("b", "reindex-bitstreams", false, "Reindex the bitstreams to ensure we have the bundle name"); options.addOption("b", "reindex-bitstreams", false, "Reindex the bitstreams to ensure we have the bundle name");
options.addOption("e", "export", false, "Export SOLR view statistics data to usage-statistics-intermediate-format");
options.addOption("r", "remove-deleted-bitstreams", false, "While indexing the bundle names remove the statistics about deleted bitstreams"); options.addOption("r", "remove-deleted-bitstreams", false, "While indexing the bundle names remove the statistics about deleted bitstreams");
options.addOption("s", "shard-solr-index", false, "Split the data from the main Solr core into separate Solr cores per year"); options.addOption("s", "shard-solr-index", false, "Split the data from the main Solr core into separate Solr cores per year");
options.addOption("h", "help", false, "help"); options.addOption("h", "help", false, "help");
@@ -96,6 +97,10 @@ public class StatisticsClient
{ {
SolrLogger.reindexBitstreamHits(line.hasOption('r')); SolrLogger.reindexBitstreamHits(line.hasOption('r'));
} }
else if(line.hasOption('e'))
{
SolrLogger.exportHits();
}
else if(line.hasOption('s')) else if(line.hasOption('s'))
{ {
SolrLogger.shardSolrIndex(); SolrLogger.shardSolrIndex();