mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-18 15:33:09 +00:00
Export SOLR Usage Statistics data to intermediate format
bin/dspace stats-util --export
This commit is contained in:
@@ -20,6 +20,7 @@ import org.apache.http.client.methods.HttpGet;
|
|||||||
import org.apache.http.impl.client.DefaultHttpClient;
|
import org.apache.http.impl.client.DefaultHttpClient;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
|
import org.apache.solr.client.solrj.SolrServer;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||||
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
||||||
@@ -30,11 +31,10 @@ import org.apache.solr.client.solrj.response.QueryResponse;
|
|||||||
import org.apache.solr.client.solrj.response.RangeFacet;
|
import org.apache.solr.client.solrj.response.RangeFacet;
|
||||||
import org.apache.solr.client.solrj.util.ClientUtils;
|
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.*;
|
||||||
import org.apache.solr.common.params.FacetParams;
|
import org.apache.solr.common.util.JavaBinCodec;
|
||||||
import org.apache.solr.common.params.MapSolrParams;
|
|
||||||
import org.apache.solr.common.params.ShardParams;
|
|
||||||
import org.dspace.content.*;
|
import org.dspace.content.*;
|
||||||
import org.dspace.content.Collection;
|
import org.dspace.content.Collection;
|
||||||
import org.dspace.core.ConfigurationManager;
|
import org.dspace.core.ConfigurationManager;
|
||||||
@@ -51,6 +51,7 @@ import javax.servlet.http.HttpServletRequest;
|
|||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
import java.text.DateFormat;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
@@ -1495,6 +1496,66 @@ public class SolrLogger
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void exportHits() throws Exception {
|
||||||
|
Context context = new Context();
|
||||||
|
|
||||||
|
try {
|
||||||
|
//First of all retrieve the total number of records to be updated
|
||||||
|
SolrQuery query = new SolrQuery();
|
||||||
|
query.setQuery("*:*");
|
||||||
|
query.setRows(0);
|
||||||
|
addAdditionalSolrYearCores(query);
|
||||||
|
long totalRecords = solr.query(query).getResults().getNumFound();
|
||||||
|
|
||||||
|
File tempDirectory = new File(ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
|
||||||
|
tempDirectory.mkdirs();
|
||||||
|
|
||||||
|
|
||||||
|
for(int i = 0; i < totalRecords; i+=10000){
|
||||||
|
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||||
|
solrParams.set(CommonParams.Q, "statistics_type:view");
|
||||||
|
solrParams.set(CommonParams.WT, "javabin");
|
||||||
|
solrParams.set(CommonParams.ROWS, String.valueOf(10000));
|
||||||
|
solrParams.set(CommonParams.START, String.valueOf(i));
|
||||||
|
|
||||||
|
//Have the SOLR data
|
||||||
|
QueryResponse rsp = solr.query(solrParams);
|
||||||
|
SolrDocumentList docs = rsp.getResults();
|
||||||
|
|
||||||
|
File exportOutput = new File(tempDirectory.getPath() + File.separatorChar + "usagestats_" + i + ".csv");
|
||||||
|
exportOutput.delete();
|
||||||
|
|
||||||
|
for(SolrDocument doc : docs) {
|
||||||
|
String uid = doc.get("uid").toString();
|
||||||
|
String ip = doc.get("ip").toString();
|
||||||
|
String id = doc.get("id").toString();
|
||||||
|
String type = doc.get("type").toString();
|
||||||
|
String time = doc.get("time").toString();
|
||||||
|
|
||||||
|
//20140527162409835,view_bitstream,1292,2014-05-27T16:24:09,anonymous,127.0.0.1
|
||||||
|
DSpaceObject dso = DSpaceObject.find(context, Integer.parseInt(type), Integer.parseInt(id));
|
||||||
|
|
||||||
|
//InputFormat: Mon May 19 07:21:27 EDT 2014
|
||||||
|
DateFormat inputDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy");
|
||||||
|
Date solrDate = inputDateFormat.parse(time);
|
||||||
|
|
||||||
|
//OutputFormat: 2014-05-27T16:24:09
|
||||||
|
DateFormat outputDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
|
||||||
|
|
||||||
|
String out = uid + "," + "view_" + dso.getTypeText().toLowerCase() + "," + id + "," + outputDateFormat.format(solrDate) + ",anonymous," + ip + "\n";
|
||||||
|
FileUtils.writeStringToFile(exportOutput, out, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
System.out.println("Export hits [" + String.valueOf(i*10000) + " - " + String.valueOf((i+1)*10000) + "] to " + exportOutput.getCanonicalPath());
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error while exporting SOLR data", e);
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
context.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static String generateURL(String baseURL, Map<String, String> parameters) throws UnsupportedEncodingException {
|
private static String generateURL(String baseURL, Map<String, String> parameters) throws UnsupportedEncodingException {
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
StringBuilder result = new StringBuilder(baseURL);
|
StringBuilder result = new StringBuilder(baseURL);
|
||||||
|
@@ -60,6 +60,7 @@ public class StatisticsClient
|
|||||||
options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address");
|
options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address");
|
||||||
options.addOption("o", "optimize", false, "Run maintenance on the SOLR index");
|
options.addOption("o", "optimize", false, "Run maintenance on the SOLR index");
|
||||||
options.addOption("b", "reindex-bitstreams", false, "Reindex the bitstreams to ensure we have the bundle name");
|
options.addOption("b", "reindex-bitstreams", false, "Reindex the bitstreams to ensure we have the bundle name");
|
||||||
|
options.addOption("e", "export", false, "Export SOLR view statistics data to usage-statistics-intermediate-format");
|
||||||
options.addOption("r", "remove-deleted-bitstreams", false, "While indexing the bundle names remove the statistics about deleted bitstreams");
|
options.addOption("r", "remove-deleted-bitstreams", false, "While indexing the bundle names remove the statistics about deleted bitstreams");
|
||||||
options.addOption("s", "shard-solr-index", false, "Split the data from the main Solr core into separate Solr cores per year");
|
options.addOption("s", "shard-solr-index", false, "Split the data from the main Solr core into separate Solr cores per year");
|
||||||
options.addOption("h", "help", false, "help");
|
options.addOption("h", "help", false, "help");
|
||||||
@@ -96,6 +97,10 @@ public class StatisticsClient
|
|||||||
{
|
{
|
||||||
SolrLogger.reindexBitstreamHits(line.hasOption('r'));
|
SolrLogger.reindexBitstreamHits(line.hasOption('r'));
|
||||||
}
|
}
|
||||||
|
else if(line.hasOption('e'))
|
||||||
|
{
|
||||||
|
SolrLogger.exportHits();
|
||||||
|
}
|
||||||
else if(line.hasOption('s'))
|
else if(line.hasOption('s'))
|
||||||
{
|
{
|
||||||
SolrLogger.shardSolrIndex();
|
SolrLogger.shardSolrIndex();
|
||||||
|
Reference in New Issue
Block a user