[DS-1144] Maven Project Consolidation : Statistics

This commit is contained in:
Mark Diggory
2012-10-03 02:55:52 -07:00
parent ef576f7012
commit 6e834495ae
46 changed files with 531 additions and 543 deletions

View File

@@ -0,0 +1,71 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics;
import com.google.gson.Gson;
import java.util.ArrayList;
import java.util.List;
/**
* A neutral data object to hold data for statistics.
*
*/
public class DataTermsFacet {
private List<TermsFacet> terms;
public DataTermsFacet() {
terms = new ArrayList<TermsFacet>();
}
public void addTermFacet(TermsFacet termsFacet ) {
terms.add(termsFacet);
}
/**
* Render this data object into JSON format.
*
* An example of the output could be of the format:
* [{"term":"247166","count":10},{"term":"247168","count":6}]
* @return
*/
public String toJson() {
Gson gson = new Gson();
return gson.toJson(terms);
}
public static class TermsFacet {
private String term;
private Integer count;
public TermsFacet(String term, Integer count) {
setTerm(term);
setCount(count);
}
public String getTerm() {
return term;
}
public void setTerm(String term) {
this.term = term;
}
public Integer getCount() {
return count;
}
public void setCount(Integer count) {
this.count = count;
}
}
}

View File

@@ -0,0 +1,265 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.Ostermiller.util.ExcelCSVPrinter;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
/**
*
* @author kevinvandevelde at atmire.com
* Date: 21-jan-2009
* Time: 13:44:48
*
*/
public class Dataset {
private int nbRows;
private int nbCols;
/* The labels shown in our columns */
private List<String> colLabels;
/* The labels shown in our rows */
private List<String> rowLabels;
private String colTitle;
private String rowTitle;
/* The attributes for the colls */
private List<Map<String, String>> colLabelsAttrs;
/* The attributes for the rows */
private List<Map<String, String>> rowLabelsAttrs;
/* The data in a matrix */
private String[][]matrix;
/* The format in which we format our floats */
private String format = "0";
public Dataset(int rows, int cols){
matrix = new String[rows][cols];
nbRows = rows;
nbCols = cols;
initColumnLabels(cols);
initRowLabels(rows);
}
public Dataset(float[][] matrix){
this.matrix = (String[][]) ArrayUtils.clone(matrix);
nbRows = matrix.length;
if(0 < matrix.length && 0 < matrix[0].length)
{
nbCols = matrix[0].length;
}
initColumnLabels(nbCols);
initRowLabels(nbRows);
}
private void initRowLabels(int rows) {
rowLabels = new ArrayList<String>(rows);
rowLabelsAttrs = new ArrayList<Map<String, String>>();
for (int i = 0; i < rows; i++) {
rowLabels.add("Row " + (i+1));
rowLabelsAttrs.add(new HashMap<String, String>());
}
}
private void initColumnLabels(int nbCols) {
colLabels = new ArrayList<String>(nbCols);
colLabelsAttrs = new ArrayList<Map<String, String>>();
for (int i = 0; i < nbCols; i++) {
colLabels.add("Column " + (i+1));
colLabelsAttrs.add(new HashMap<String, String>());
}
}
public void setColLabel(int n, String label){
colLabels.set(n, label);
}
public void setRowLabel(int n, String label){
rowLabels.set(n, label);
}
public String getRowTitle() {
return rowTitle;
}
public String getColTitle() {
return colTitle;
}
public void setColTitle(String colTitle) {
this.colTitle = colTitle;
}
public void setRowTitle(String rowTitle) {
this.rowTitle = rowTitle;
}
public void setRowLabelAttr(int pos, String attrName, String attr){
Map<String, String> attrs = rowLabelsAttrs.get(pos);
attrs.put(attrName, attr);
rowLabelsAttrs.set(pos, attrs);
}
public void setRowLabelAttr(int pos, Map<String, String> attrMap){
rowLabelsAttrs.set(pos, attrMap);
}
public void setColLabelAttr(int pos, String attrName, String attr){
Map<String, String> attrs = colLabelsAttrs.get(pos);
attrs.put(attrName, attr);
colLabelsAttrs.set(pos, attrs);
}
public void setColLabelAttr(int pos, Map<String, String> attrMap) {
colLabelsAttrs.set(pos, attrMap);
}
public List<Map<String, String>> getColLabelsAttrs() {
return colLabelsAttrs;
}
public List<Map<String, String>> getRowLabelsAttrs() {
return rowLabelsAttrs;
}
public List<String> getColLabels() {
return colLabels;
}
public List<String> getRowLabels() {
return rowLabels;
}
public int getNbRows() {
return nbRows;
}
public int getNbCols() {
return nbCols;
}
public String getFormat() {
return format;
}
public void setFormat(String format) {
this.format = format;
}
public String[][] getMatrix(){
if (matrix.length == 0) {
return new String[0][0];
} else {
return matrix;
}
}
public void addValueToMatrix(int row, int coll, float value) {
DecimalFormat decimalFormat = new DecimalFormat(format);
matrix[row][coll] = decimalFormat.format(value);
}
public void addValueToMatrix(int row, int coll, String value) throws ParseException {
matrix[row][coll] = value;
}
/**
* Returns false if this dataset only contains zero's.
*/
public boolean containsNonZeroValues(){
if (matrix != null) {
for (String[] vector : matrix) {
for (String v : vector) {
if (StringUtils.isBlank(v) || v.equals("0"))
{
return true;
}
}
}
}
return false;
}
public void flipRowCols(){
//Lets make sure we at least have something to flip
if(0 < matrix.length && 0 < matrix[0].length){
//Flip the data first
String[][] newMatrix = new String[matrix[0].length][matrix.length];
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[i].length; j++) {
newMatrix[j][i] = matrix[i][j];
}
}
//Flip the rows & column labels
List<String> backup = colLabels;
colLabels = rowLabels;
rowLabels = backup;
//Also flip the links
List<Map<String, String>> backList = colLabelsAttrs;
colLabelsAttrs = rowLabelsAttrs;
rowLabelsAttrs = backList;
matrix = newMatrix;
}
//Also flip these sizes
int backUp = nbRows;
nbRows = nbCols;
nbCols = backUp;
//Also flip the title's
String backup = rowTitle;
rowTitle = colTitle;
colTitle = backup;
}
public ByteArrayOutputStream exportAsCSV() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ExcelCSVPrinter ecsvp = new ExcelCSVPrinter(baos);
ecsvp.changeDelimiter(';');
ecsvp.setAlwaysQuote(true);
//Generate the item row
List<String> colLabels = getColLabels();
ecsvp.write("");
for (String colLabel : colLabels) {
ecsvp.write(colLabel);
}
ecsvp.writeln();
List<String> rowLabels = getRowLabels();
String[][] matrix = getMatrix();
for (int i = 0; i < rowLabels.size(); i++) {
String rowLabel = rowLabels.get(i);
ecsvp.write(rowLabel);
for (int j = 0; j < matrix[i].length; j++) {
ecsvp.write(matrix[i][j]);
}
ecsvp.writeln();
}
ecsvp.flush();
ecsvp.close();
return baos;
}
}

View File

@@ -0,0 +1,562 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics;
import com.maxmind.geoip.Location;
import com.maxmind.geoip.LookupService;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.dspace.content.*;
import org.dspace.content.Collection;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.eperson.EPerson;
import org.dspace.statistics.util.DnsLookup;
import org.dspace.statistics.util.LocationUtils;
import org.dspace.statistics.util.SpiderDetector;
import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.admin.indices.exists.IndicesExistsRequest;
import org.elasticsearch.action.admin.indices.exists.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.action.admin.indices.mapping.put.PutMappingRequestBuilder;
import org.elasticsearch.client.action.index.IndexRequestBuilder;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.node.Node;
import org.elasticsearch.node.NodeBuilder;
import javax.servlet.http.HttpServletRequest;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.SQLException;
import java.util.*;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
public class ElasticSearchLogger {
private static Logger log = Logger.getLogger(ElasticSearchLogger.class);
private static boolean useProxies;
public static final String DATE_FORMAT_8601 = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
public static final String DATE_FORMAT_DCDATE = "yyyy-MM-dd'T'HH:mm:ss'Z'";
private static LookupService locationService;
public static String clusterName = "dspacestatslogging";
public static String indexName = "dspaceindex";
public static String indexType = "stats";
public static String address = "127.0.0.1";
public static int port = 9300;
private static Client client;
public static enum ClientType {
NODE, LOCAL, TRANSPORT
}
public ElasticSearchLogger() {
// nobody should be instantiating this...
}
public ElasticSearchLogger(boolean doInitialize) {
initializeElasticSearch();
}
public static ElasticSearchLogger getInstance() {
return ElasticSearchLoggerSingletonHolder.instance;
}
// Singleton Pattern of "Initialization on demand holder idiom"
private static class ElasticSearchLoggerSingletonHolder {
public static final ElasticSearchLogger instance = new ElasticSearchLogger(true);
}
public void initializeElasticSearch() {
log.info("DSpace ElasticSearchLogger Initializing");
try {
LookupService service = null;
// Get the db file for the location
String dbfile = ConfigurationManager.getProperty("usage-statistics", "dbfile");
if (dbfile != null) {
try {
service = new LookupService(dbfile, LookupService.GEOIP_STANDARD);
} catch (FileNotFoundException fe) {
log.error("The GeoLite Database file is missing (" + dbfile + ")! Usage Statistics cannot generate location based reports! Please see the DSpace installation instructions for instructions to install this file.", fe);
} catch (IOException e) {
log.error("Unable to load GeoLite Database file (" + dbfile + ")! You may need to reinstall it. See the DSpace installation instructions for more details.", e);
}
} else {
log.error("The required 'dbfile' configuration is missing in usage-statistics.cfg!");
}
locationService = service;
if ("true".equals(ConfigurationManager.getProperty("useProxies"))) {
useProxies = true;
} else {
useProxies = false;
}
log.info("useProxies=" + useProxies);
// Configurable values for all elasticsearch connection constants
clusterName = getConfigurationStringWithFallBack("elastic-search-statistics", "clusterName", clusterName);
indexName = getConfigurationStringWithFallBack("elastic-search-statistics", "indexName", indexName);
indexType = getConfigurationStringWithFallBack("elastic-search-statistics", "indexType", indexType);
address = getConfigurationStringWithFallBack("elastic-search-statistics", "address", address);
port = ConfigurationManager.getIntProperty("elastic-search-statistics", "port", port);
//Initialize the connection to Elastic Search, and ensure our index is available.
client = getClient();
IndicesExistsRequest indicesExistsRequest = new IndicesExistsRequest();
indicesExistsRequest.indices(new String[] {indexName});
ActionFuture<IndicesExistsResponse> actionFutureIndicesExist = client.admin().indices().exists(indicesExistsRequest);
log.info("DS ES Checking if index exists");
if(! actionFutureIndicesExist.actionGet().isExists() ) {
//If elastic search index exists, then we are good to go, otherwise, we need to create that index. Should only need to happen once ever.
log.info("DS ES index didn't exist, we need to create it.");
Settings settings = ImmutableSettings.settingsBuilder()
.put("number_of_replicas", 1)
.put("number_of_shards", 5)
.put("cluster.name", clusterName)
.build();
String stringMappingJSON = "{\""+indexType+"\" : { \"properties\" : {\n" +
" \"userAgent\":{\n" +
" \"type\":\"string\"\n" +
" },\n" +
" \"countryCode\":{\n" +
" \"type\":\"string\",\n" +
" \"index\":\"not_analyzed\",\n" +
" \"omit_norms\":true\n" +
" },\n" +
" \"dns\":{\n" +
" \"type\":\"multi_field\",\n" +
" \"fields\": {\n" +
" \"dns\": {\"type\":\"string\",\"index\":\"analyzed\"},\n" +
" \"untouched\":{\"type\":\"string\",\"index\":\"not_analyzed\"}\n" +
" }\n" +
" },\n" +
" \"isBot\":{\n" +
" \"type\":\"boolean\"\n" +
" },\n" +
" \"owningColl\":{\n" +
" \"type\":\"integer\",\n" +
" \"index\":\"not_analyzed\"\n" +
" },\n" +
" \"type\":{\n" +
" \"type\":\"string\",\n" +
" \"index\":\"not_analyzed\",\n" +
" \"omit_norms\":true\n" +
" },\n" +
" \"owningComm\":{\n" +
" \"type\":\"integer\",\n" +
" \"index\":\"not_analyzed\"\n" +
" },\n" +
" \"city\":{\n" +
" \"type\":\"multi_field\",\n" +
" \"fields\": {\n" +
" \"city\": {\"type\":\"string\",\"index\":\"analyzed\"},\n" +
" \"untouched\":{\"type\":\"string\",\"index\":\"not_analyzed\"}\n" +
" }\n" +
" },\n" +
" \"country\":{\n" +
" \"type\":\"multi_field\",\n" +
" \"fields\": {\n" +
" \"country\": {\"type\":\"string\",\"index\":\"analyzed\"},\n" +
" \"untouched\":{\"type\":\"string\",\"index\":\"not_analyzed\"}\n" +
" }\n" +
" },\n" +
" \"ip\":{\n" +
" \"type\":\"multi_field\",\n" +
" \"fields\": {\n" +
" \"ip\": {\"type\":\"string\",\"index\":\"analyzed\"},\n" +
" \"untouched\":{\"type\":\"string\",\"index\":\"not_analyzed\"}\n" +
" }\n" +
" },\n" +
" \"id\":{\n" +
" \"type\":\"integer\",\n" +
" \"index\":\"not_analyzed\"\n" +
" },\n" +
" \"time\":{\n" +
" \"type\":\"date\"\n" +
" },\n" +
" \"owningItem\":{\n" +
" \"type\":\"string\",\n" +
" \"index\":\"not_analyzed\"\n" +
" },\n" +
" \"continent\":{\n" +
" \"type\":\"string\",\n" +
" \"index\":\"not_analyzed\"\n" +
" },\n" +
" \"geo\":{\n" +
" \"type\":\"geo_point\"\n" +
" },\n" +
" \"bundleName\":{\n" +
" \"type\":\"string\",\n" +
" \"index\":\"not_analyzed\"\n" +
" },\n" +
" \"epersonid\":{\n" +
" \"type\":\"string\",\n" +
" \"index\":\"not_analyzed\"\n" +
" }\n" +
"} } }";
client.prepareIndex(indexName, indexType, "1")
.setSource(XContentFactory.jsonBuilder()
.startObject()
.field("user", "kimchy")
.field("postDate", new Date())
.field("message", "trying out Elastic Search")
.endObject()
)
.execute()
.actionGet();
log.info("Create INDEX ["+indexName+"]/["+indexType+"]");
// Wait for create to be finished.
client.admin().indices().prepareRefresh(indexName).execute().actionGet();
//Put the schema/mapping
log.info("Put Mapping for ["+indexName+"]/["+indexType+"]="+stringMappingJSON);
PutMappingRequestBuilder putMappingRequestBuilder = client.admin().indices().preparePutMapping(indexName).setType(indexType);
putMappingRequestBuilder.setSource(stringMappingJSON);
PutMappingResponse response = putMappingRequestBuilder.execute().actionGet();
if(!response.getAcknowledged()) {
log.info("Could not define mapping for type ["+indexName+"]/["+indexType+"]");
} else {
log.info("Successfully put mapping for ["+indexName+"]/["+indexType+"]");
}
log.info("DS ES index didn't exist, but we created it.");
} else {
log.info("DS ES index already exists");
}
log.info("DSpace ElasticSearchLogger Initialized Successfully (I suppose)");
} catch (Exception e) {
log.info("Elastic Search crashed during init. " + e.getMessage());
}
}
public void post(DSpaceObject dspaceObject, HttpServletRequest request, EPerson currentUser) {
//log.info("DS-ES post for type:"+dspaceObject.getType() + " -- " + dspaceObject.getName());
client = ElasticSearchLogger.getInstance().getClient();
boolean isSpiderBot = SpiderDetector.isSpider(request);
try {
if (isSpiderBot &&
!ConfigurationManager.getBooleanProperty("usage-statistics", "logBots", true)) {
return;
}
// Save our basic info that we already have
String ip = request.getRemoteAddr();
if (isUseProxies() && request.getHeader("X-Forwarded-For") != null) {
/* This header is a comma delimited list */
for (String xfip : request.getHeader("X-Forwarded-For").split(",")) {
/* proxy itself will sometime populate this header with the same value in
remote address. ordering in spec is vague, we'll just take the last
not equal to the proxy
*/
if (!request.getHeader("X-Forwarded-For").contains(ip)) {
ip = xfip.trim();
}
}
}
XContentBuilder docBuilder = null;
docBuilder = XContentFactory.jsonBuilder().startObject();
docBuilder.field("ip", ip);
docBuilder.field("id", dspaceObject.getID());
// The numerical constant that represents the DSpaceObject TYPE. i.e. 0=bitstream, 2=item, ...
docBuilder.field("typeIndex", dspaceObject.getType());
// The text that represent the DSpaceObject TYPE. i.e. BITSTREAM, ITEM, COLLECTION, COMMUNITY
docBuilder.field("type", Constants.typeText[dspaceObject.getType()]);
// Save the current time
docBuilder.field("time", DateFormatUtils.format(new Date(), DATE_FORMAT_8601));
if (currentUser != null) {
docBuilder.field("epersonid", currentUser.getID());
}
try {
String dns = DnsLookup.reverseDns(ip);
docBuilder.field("dns", dns.toLowerCase());
} catch (Exception e) {
log.error("Failed DNS Lookup for IP:" + ip);
log.debug(e.getMessage(), e);
}
// Save the location information if valid, save the event without
// location information if not valid
Location location = locationService.getLocation(ip);
if (location != null
&& !("--".equals(location.countryCode)
&& location.latitude == -180 && location.longitude == -180)) {
try {
docBuilder.field("continent", LocationUtils
.getContinentCode(location.countryCode));
} catch (Exception e) {
System.out
.println("COUNTRY ERROR: " + location.countryCode);
}
docBuilder.field("countryCode", location.countryCode);
docBuilder.field("city", location.city);
docBuilder.field("latitude", location.latitude);
docBuilder.field("longitude", location.longitude);
docBuilder.field("isBot", isSpiderBot);
if (request.getHeader("User-Agent") != null) {
docBuilder.field("userAgent", request.getHeader("User-Agent"));
}
}
if (dspaceObject instanceof Bitstream) {
Bitstream bit = (Bitstream) dspaceObject;
Bundle[] bundles = bit.getBundles();
docBuilder.field("bundleName").startArray();
for (Bundle bundle : bundles) {
docBuilder.value(bundle.getName());
}
docBuilder.endArray();
}
storeParents(docBuilder, getParents(dspaceObject));
docBuilder.endObject();
if (docBuilder != null) {
IndexRequestBuilder irb = client.prepareIndex(indexName, indexType)
.setSource(docBuilder);
//log.info("Executing document insert into index");
if(client == null) {
log.error("Hey, client is null");
}
irb.execute().actionGet();
}
} catch (RuntimeException re) {
log.error("RunTimer in ESL:\n" + ExceptionUtils.getStackTrace(re));
throw re;
} catch (Exception e) {
log.error(e.getMessage());
} finally {
client.close();
}
}
public static String getClusterName() {
return clusterName;
}
public static void setClusterName(String clusterName) {
ElasticSearchLogger.clusterName = clusterName;
}
public static String getIndexName() {
return indexName;
}
public static void setIndexName(String indexName) {
ElasticSearchLogger.indexName = indexName;
}
public static String getIndexType() {
return indexType;
}
public static void setIndexType(String indexType) {
ElasticSearchLogger.indexType = indexType;
}
public static String getAddress() {
return address;
}
public static void setAddress(String address) {
ElasticSearchLogger.address = address;
}
public static int getPort() {
return port;
}
public static void setPort(int port) {
ElasticSearchLogger.port = port;
}
public void buildParents(DSpaceObject dso, HashMap<String, ArrayList<Integer>> parents)
throws SQLException {
if (dso instanceof Community) {
Community comm = (Community) dso;
while (comm != null && comm.getParentCommunity() != null) {
comm = comm.getParentCommunity();
parents.get("owningComm").add(comm.getID());
}
} else if (dso instanceof Collection) {
Collection coll = (Collection) dso;
for (Community community : coll.getCommunities()) {
parents.get("owningComm").add(community.getID());
buildParents(community, parents);
}
} else if (dso instanceof Item) {
Item item = (Item) dso;
for (Collection collection : item.getCollections()) {
parents.get("owningColl").add(collection.getID());
buildParents(collection, parents);
}
} else if (dso instanceof Bitstream) {
Bitstream bitstream = (Bitstream) dso;
for (Bundle bundle : bitstream.getBundles()) {
for (Item item : bundle.getItems()) {
parents.get("owningItem").add(item.getID());
buildParents(item, parents);
}
}
}
}
public HashMap<String, ArrayList<Integer>> getParents(DSpaceObject dso)
throws SQLException {
HashMap<String, ArrayList<Integer>> parents = new HashMap<String, ArrayList<Integer>>();
parents.put("owningComm", new ArrayList<Integer>());
parents.put("owningColl", new ArrayList<Integer>());
parents.put("owningItem", new ArrayList<Integer>());
buildParents(dso, parents);
return parents;
}
public void storeParents(XContentBuilder docBuilder, HashMap<String, ArrayList<Integer>> parents) throws IOException {
Iterator it = parents.keySet().iterator();
while (it.hasNext()) {
String key = (String) it.next();
ArrayList<Integer> ids = parents.get(key);
if (ids.size() > 0) {
docBuilder.field(key).startArray();
for (Integer i : ids) {
docBuilder.value(i);
}
docBuilder.endArray();
}
}
}
public boolean isUseProxies() {
return useProxies;
}
// Transport Client will talk to server on 9300
public void createTransportClient() {
// Configurable values for all elasticsearch connection constants
// Can't guarantee that these values are already loaded, since this can be called by a different JVM
clusterName = getConfigurationStringWithFallBack("elastic-search-statistics", "clusterName", clusterName);
indexName = getConfigurationStringWithFallBack("elastic-search-statistics", "indexName", indexName);
indexType = getConfigurationStringWithFallBack("elastic-search-statistics", "indexType", indexType);
address = getConfigurationStringWithFallBack("elastic-search-statistics", "address", address);
port = ConfigurationManager.getIntProperty("elastic-search-statistics", "port", port);
log.info("Creating TransportClient to [Address:" + address + "] [Port:" + port + "] [cluster.name:" + clusterName + "]");
Settings settings = ImmutableSettings.settingsBuilder().put("cluster.name", clusterName).build();
client = new TransportClient(settings).addTransportAddress(new InetSocketTransportAddress(address, port));
}
public Client getClient() {
//Get an available client, otherwise new default is NODE.
return getClient(ClientType.NODE);
}
// Get the already available client, otherwise we will create a new client.
// TODO Allow for config to determine which architecture / topology to use.
// - Local Node, store Data
// - Node Client, must discover a master within ES cluster
// - Transport Client, specify IP address of server running ES.
public Client getClient(ClientType clientType) {
if(client == null) {
log.error("getClient reports null client");
if(clientType == ClientType.TRANSPORT) {
createTransportClient();
} else {
createNodeClient(clientType);
}
}
return client;
}
// Node Client will discover other ES nodes running in local JVM
public Client createNodeClient(ClientType clientType) {
String dspaceDir = ConfigurationManager.getProperty("dspace.dir");
Settings settings = ImmutableSettings.settingsBuilder().put("path.data", dspaceDir + "/elasticsearch/").build();
NodeBuilder nodeBuilder = NodeBuilder.nodeBuilder().clusterName(clusterName).data(true).settings(settings);
if(clientType == ClientType.LOCAL) {
log.info("Create a Local Node.");
nodeBuilder = nodeBuilder.local(true);
} else if(clientType == ClientType.NODE) {
log.info("Create a nodeClient, allows transport clients to connect");
nodeBuilder = nodeBuilder.local(false);
}
Node node = nodeBuilder.node();
log.info("Got node");
client = node.client();
log.info("Created new node client");
return client;
}
public String getConfigurationStringWithFallBack(String module, String configurationKey, String defaultFallbackValue) {
String configDrivenValue = ConfigurationManager.getProperty(module, configurationKey);
if(configDrivenValue == null || configDrivenValue.trim().equalsIgnoreCase("")) {
return defaultFallbackValue;
} else {
return configDrivenValue;
}
}
}

View File

@@ -0,0 +1,40 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics;
import org.apache.log4j.Logger;
import org.dspace.eperson.EPerson;
import org.dspace.services.model.Event;
import org.dspace.usage.AbstractUsageEventListener;
import org.dspace.usage.UsageEvent;
public class ElasticSearchLoggerEventListener extends AbstractUsageEventListener {
private static Logger log = Logger.getLogger(ElasticSearchLoggerEventListener.class);
public void receiveEvent(Event event) {
if(event instanceof UsageEvent && (((UsageEvent) event).getAction() == UsageEvent.Action.VIEW))
{
try{
UsageEvent ue = (UsageEvent) event;
EPerson currentUser = ue.getContext() == null ? null : ue.getContext().getCurrentUser();
ElasticSearchLogger.getInstance().post(ue.getObject(), ue.getRequest(), currentUser);
log.info("Successfully logged " + ue.getObject().getTypeText() + "_" + ue.getObject().getID() + " " + ue.getObject().getName());
}
catch(Exception e)
{
log.error("General Exception: " + e.getMessage());
}
}
}
}

View File

@@ -0,0 +1,39 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics;
/**
* Data structure for returning results from statistics searches.
*
* @author mdiggory at atmire.com
* @author ben at atmire.com
* @author kevinvandevelde at atmire.com
*/
public class ObjectCount {
private long count;
private String value;
public ObjectCount(){
}
public long getCount() {
return count;
}
public void setCount(long count) {
this.count = count;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,66 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics;
import org.apache.log4j.Logger;
import org.dspace.eperson.EPerson;
import org.dspace.services.model.Event;
import org.dspace.usage.AbstractUsageEventListener;
import org.dspace.usage.UsageEvent;
import org.dspace.usage.UsageSearchEvent;
import org.dspace.usage.UsageWorkflowEvent;
import org.springframework.util.CollectionUtils;
/**
* Simple SolrLoggerUsageEvent facade to separate Solr specific
* logging implementation from DSpace.
*
* @author mdiggory
*
*/
public class SolrLoggerUsageEventListener extends AbstractUsageEventListener {
private static Logger log = Logger.getLogger(SolrLoggerUsageEventListener.class);
public void receiveEvent(Event event) {
if(event instanceof UsageEvent)
{
try{
UsageEvent ue = (UsageEvent)event;
EPerson currentUser = ue.getContext() == null ? null : ue.getContext().getCurrentUser();
if(UsageEvent.Action.VIEW == ue.getAction()){
SolrLogger.postView(ue.getObject(), ue.getRequest(), currentUser);
}else
if(UsageEvent.Action.SEARCH == ue.getAction()){
UsageSearchEvent usageSearchEvent = (UsageSearchEvent) ue;
//Only log if the user has already filled in a query !
if(!CollectionUtils.isEmpty(((UsageSearchEvent) ue).getQueries())){
SolrLogger.postSearch(ue.getObject(), ue.getRequest(), currentUser,
usageSearchEvent.getQueries(), usageSearchEvent.getRpp(), usageSearchEvent.getSortBy(),
usageSearchEvent.getSortOrder(), usageSearchEvent.getPage(), usageSearchEvent.getScope());
}
}else
if(UsageEvent.Action.WORKFLOW == ue.getAction()){
UsageWorkflowEvent usageWorkflowEvent = (UsageWorkflowEvent) ue;
SolrLogger.postWorkflow(usageWorkflowEvent);
}
}
catch(Exception e)
{
log.error(e.getMessage());
}
}
}
}

View File

@@ -0,0 +1,222 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Item;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.event.Consumer;
import org.dspace.event.Event;
/**
* StatisticsLogging Consumer for SolrLogger which captures Create, Update
* and Delete Events on DSpace Objects.
*
* All usage-events will be updated to capture changes to e.g.
* the owning collection
*
* @author kevinvandevelde at atmire.com
* @author ben at atmrie.com
*/
public class StatisticsLoggingConsumer implements Consumer
{
private Set<String> toRemoveQueries = null;
public void initialize() throws Exception
{
}
// TODO: checkout whether moving of collections, communities and bitstreams works
// TODO: use async threaded consumer as this might require some processing time
// TODO: we might be able to improve the performance: changing the collection will trigger 4 update commands
public void consume(Context ctx, Event event) throws Exception
{
if (toRemoveQueries == null)
{
toRemoveQueries = new HashSet<String>();
}
int dsoId = event.getSubjectID();
int dsoType = event.getSubjectType();
int eventType = event.getEventType();
// Check if we are deleting something
if (eventType == Event.DELETE)
{
// First make sure we delete everything for this dso
String query = "id:" + dsoId + " AND type:" + dsoType;
toRemoveQueries.add(query);
}
else if (eventType == Event.MODIFY && dsoType == Constants.ITEM)
{
// We have a modified item check for a withdraw/reinstate
}
else if (eventType == Event.MODIFY_METADATA
&& event.getSubjectType() == Constants.ITEM)
{
Item item = Item.find(ctx, event.getSubjectID());
String updateQuery = "id:" + item.getID() + " AND type:"
+ item.getType();
Map<String, List<String>> indexedValues = SolrLogger.queryField(
updateQuery, null, null);
// Get all the metadata
Map<String, String> metadataStorageInfo = SolrLogger.getMetadataStorageInfo();
List<String> storageFieldList = new ArrayList<String>();
List<List<Object>> storageValuesList = new ArrayList<List<Object>>();
for (Map.Entry<String, String> entry : metadataStorageInfo.entrySet())
{
String[] metadataFieldInfo = entry.getValue().split("\\.");
List<Object> values = new ArrayList<Object>();
List<Object> valuesLow = new ArrayList<Object>();
for (int i = 0; i < item.getMetadata(metadataFieldInfo[0],
metadataFieldInfo[1], metadataFieldInfo[2], Item.ANY).length; i++)
{
values.add(item.getMetadata(metadataFieldInfo[0],
metadataFieldInfo[1], metadataFieldInfo[2],
Item.ANY)[i].value);
valuesLow.add(item.getMetadata(metadataFieldInfo[0],
metadataFieldInfo[1], metadataFieldInfo[2],
Item.ANY)[i].value.toLowerCase());
}
List<String> indexedVals = indexedValues.get(entry.getKey());
boolean update = true;
if (values.size() == indexedVals.size() && values.containsAll(indexedVals))
{
update = false;
}
if (update)
{
storageFieldList.add(entry.getKey());
storageFieldList.add(entry.getKey() + "_search");
storageValuesList.add(values);
storageValuesList.add(valuesLow);
}
}
SolrLogger.update(updateQuery, "replace", storageFieldList,
storageValuesList);
}
if (eventType == Event.ADD && dsoType == Constants.COLLECTION
&& event.getObject(ctx) instanceof Item)
{
// We are mapping a new item make sure that the owning collection is
// updated
Item newItem = (Item) event.getObject(ctx);
String updateQuery = "id: " + newItem.getID() + " AND type:"
+ newItem.getType();
List<String> fieldNames = new ArrayList<String>();
List<List<Object>> valuesList = new ArrayList<List<Object>>();
fieldNames.add("owningColl");
fieldNames.add("owningComm");
List<Object> valsList = new ArrayList<Object>();
valsList.add(dsoId);
valuesList.add(valsList);
valsList = new ArrayList<Object>();
valsList.addAll(findOwningCommunities(ctx, dsoId));
valuesList.add(valsList);
// Now make sure we also update the communities
SolrLogger.update(updateQuery, "addOne", fieldNames, valuesList);
}
else if (eventType == Event.REMOVE && dsoType == Constants.COLLECTION
&& event.getObject(ctx) instanceof Item)
{
// Unmapping items
Item newItem = (Item) event.getObject(ctx);
String updateQuery = "id: " + newItem.getID() + " AND type:"
+ newItem.getType();
List<String> fieldNames = new ArrayList<String>();
List<List<Object>> valuesList = new ArrayList<List<Object>>();
fieldNames.add("owningColl");
fieldNames.add("owningComm");
List<Object> valsList = new ArrayList<Object>();
valsList.add(dsoId);
valuesList.add(valsList);
valsList = new ArrayList<Object>();
valsList.addAll(findOwningCommunities(ctx, dsoId));
valuesList.add(valsList);
SolrLogger.update(updateQuery, "remOne", fieldNames, valuesList);
}
}
private List<Object> findOwningCommunities(Context context, int collId)
throws SQLException
{
Collection coll = Collection.find(context, collId);
List<Object> owningComms = new ArrayList<Object>();
for (int i = 0; i < coll.getCommunities().length; i++)
{
Community community = coll.getCommunities()[i];
findComms(community, owningComms);
}
return owningComms;
}
private void findComms(Community comm, List<Object> parentComms)
throws SQLException
{
if (comm == null)
{
return;
}
if (!parentComms.contains(comm.getID()))
{
parentComms.add(comm.getID());
}
findComms(comm.getParentCommunity(), parentComms);
}
public void end(Context ctx) throws Exception
{
if (toRemoveQueries != null)
{
for (String query : toRemoveQueries)
{
SolrLogger.removeIndex(query);
}
}
// clean out toRemoveQueries
toRemoveQueries = null;
}
public void finish(Context ctx) throws Exception
{
}
}

View File

@@ -0,0 +1,84 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import org.dspace.core.Constants;
/**
* Describes the displayed representation of the statistics on a DSpaceObject
* and its children.
* @author TODO
*/
public class DSORepresentation {
/** The type of DSpaceObject to be shown. */
private Integer type;
/** The maximum number of children to show. **/
private Integer max;
/** Determines if should show the DSOs as separate entities or use the sum of them. */
private Boolean separate;
private Integer nameLength;
/** Construct a representation assumed to be of an ITEM. */
public DSORepresentation() {
setType(Constants.ITEM);
}
/** Construct a representation as described.
*
* @param type Object type, e.g. Constants.COLLECTION
* @param max Maximum number of children to display
* @param separate True if children's statistics are distinct; false if summed
*/
public DSORepresentation(Integer type, Integer max, Boolean separate) {
this.type = type;
this.max = max;
this.separate = separate;
}
public final Integer getType() {
return type;
}
/**
* @param type Object type, e.g. Constants.COLLECTION
*/
public final void setType(Integer type) {
this.type = type;
}
public final Integer getMax() {
return (max == null) ? -1 : max;
}
/**
* @param max Maximum number of children to display
*/
public final void setMax(Integer max) {
this.max = max;
}
public final Integer getNameLength() {
return nameLength;
}
public final void setNameLength(Integer nameLength) {
this.nameLength = nameLength;
}
public final Boolean getSeparate() {
return (separate != null) && separate;
}
/**
* @param separate true for distinct child statistics; false to sum them
*/
public final void setSeparate(Boolean separate) {
this.separate = separate;
}
}

View File

@@ -0,0 +1,44 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import java.util.ArrayList;
import java.util.List;
/**
* Represents a dspace object based facet for filtering.
*
* @author kevinvandevelde at atmire.com
* Date: 23-dec-2008
* Time: 11:38:20
*
*/
public class DatasetDSpaceObjectGenerator extends DatasetGenerator {
/** The children of our dspaceobject to be shown **/
private List<DSORepresentation> dsoRepresentations;
public DatasetDSpaceObjectGenerator() {
dsoRepresentations = new ArrayList<DSORepresentation>();
}
public void addDsoChild(DSORepresentation representation){
dsoRepresentations.add(representation);
}
public void addDsoChild(int type, int max, boolean separate, int nameLength){
DSORepresentation rep = new DSORepresentation(type, max, separate);
rep.setNameLength(nameLength);
dsoRepresentations.add(rep);
}
public List<DSORepresentation> getDsoRepresentations() {
return dsoRepresentations;
}
}

View File

@@ -0,0 +1,42 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
/**
* Represents a single facet for filtering.
* Can be one of the axes in a table.
*
* @author kevinvandevelde at atmire.com
* Date: 23-dec-2008
* Time: 9:39:37
*
*/
public abstract class DatasetGenerator {
/** The type of generator can either be CATEGORY or SERIE **/
protected int datasetType;
protected boolean includeTotal = false;
public int getDatasetType(){
return datasetType;
}
public void setDatasetType(int datasetType){
this.datasetType = datasetType;
}
public boolean isIncludeTotal() {
return includeTotal;
}
public void setIncludeTotal(boolean includeTotal) {
this.includeTotal = includeTotal;
}
}

View File

@@ -0,0 +1,56 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
/**
* @author Kevin Van de Velde (kevin at atmire dot com)
* @author Ben Bosman (ben at atmire dot com)
* @author Mark Diggory (markd at atmire dot com)
*/
public class DatasetSearchGenerator extends DatasetTypeGenerator {
public static enum Mode {
SEARCH_OVERVIEW ("search_overview"),
SEARCH_OVERVIEW_TOTAL ("search_overview_total");
private final String text;
Mode(String text) {
this.text = text;
}
public String text() { return text; }
}
private Mode mode;
private boolean percentage = false;
private boolean retrievePageViews;
public boolean isRetrievePageViews() {
return retrievePageViews;
}
public void setRetrievePageViews(boolean retrievePageViews) {
this.retrievePageViews = retrievePageViews;
}
public void setPercentage(boolean percentage){
this.percentage = percentage;
}
public boolean isPercentage() {
return percentage;
}
public Mode getMode() {
return mode;
}
public void setMode(Mode mode) {
this.mode = mode;
}
}

View File

@@ -0,0 +1,235 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import java.util.Calendar;
import java.util.Date;
/**
* Represents a date facet for filtering.
*
* @author kevinvandevelde at atmire.com
* Date: 23-dec-2008
* Time: 9:44:57
*
*/
public class DatasetTimeGenerator extends DatasetGenerator {
private String type = "time";
private String dateType;
private String startDate;
private String endDate;
private Date actualStartDate;
private Date actualEndDate;
//TODO: process includetotal
public DatasetTimeGenerator() {
}
/**
* Sets the date interval.
* For example if you wish to see the data from today to six months ago give
* the following parameters:
* datatype = "month"
* start = "-6"
* end = "+1" // the +1 indicates this month also
*
* @param dateType type can be days, months, years
* @param start the start of the interval
* @param end the end of the interval
*/
public void setDateInterval(String dateType, String start, String end){
this.startDate = start;
this.endDate = end;
this.dateType = dateType;
}
public void setDateInterval(String dateType, Date start, Date end)
throws IllegalArgumentException
{
actualStartDate = (start == null ? null : new Date(start.getTime()));
actualEndDate = (end == null ? null : new Date(end.getTime()));
this.dateType = dateType;
//Check if end comes before start
Calendar startCal1 = Calendar.getInstance();
Calendar endCal1 = Calendar.getInstance();
if (startCal1 == null || endCal1 == null)
{
throw new IllegalStateException("Unable to create calendar instances");
}
startCal1.setTime(start);
endCal1.setTime(end);
if(endCal1.before(startCal1))
{
throw new IllegalArgumentException();
}
// TODO: ensure future dates are tested. Although we normally do not
// have visits from the future.
//Depending on our dateType check if we need to use days/months/years.
int type = -1;
if("year".equalsIgnoreCase(dateType)){
type = Calendar.YEAR;
}else
if("month".equalsIgnoreCase(dateType)){
type = Calendar.MONTH;
}else
if("day".equalsIgnoreCase(dateType)){
type = Calendar.DATE;
}else
if("hour".equalsIgnoreCase(dateType)){
type = Calendar.HOUR;
}
int difStart = getTimeDifference(start, Calendar.getInstance().getTime(), type);
int difEnd = getTimeDifference(end, Calendar.getInstance().getTime(), type);
// System.out.println(difStart + " " + difEnd);
boolean endPos = false;
if(difEnd == 0){
//Includes the current
difEnd = 1;
endPos = true;
}else
if(0 < difEnd)
{
endPos = true;
}
else{
difEnd++;
}
startDate = "" + difStart;
//We need +1 so we can count the current month/year/...
endDate = (endPos ? "+" : "") + difEnd;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public String getDateType() {
return dateType.toUpperCase();
}
public Date getActualStartDate() {
return actualStartDate == null ? null : new Date(actualStartDate.getTime());
}
public void setActualStartDate(Date actualStartDate) {
this.actualStartDate = (actualStartDate == null ? null : new Date(actualStartDate.getTime()));
}
public Date getActualEndDate() {
return actualEndDate == null ? null : new Date(actualEndDate.getTime());
}
public void setActualEndDate(Date actualEndDate) {
this.actualEndDate = (actualEndDate == null ? null : new Date(actualEndDate.getTime()));
}
public void setDateType(String dateType) {
this.dateType = dateType;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
/** Get the difference between two Dates in terms of a given interval.
* That is: if you specify the difference in months, you get back the
* number of months between the dates.
*
* @param date1 the first date
* @param date2 the other date
* @param type Calendar.HOUR or .DATE or .MONTH
* @return number of {@code type} intervals between {@code date1} and
* {@code date2}
*/
private int getTimeDifference(Date date1, Date date2, int type){
int toAdd;
int elapsed = 0;
//We need calendar objects to compare
Calendar cal1, cal2;
cal1 = Calendar.getInstance();
cal2 = Calendar.getInstance();
cal1.setTime(date1);
cal2.setTime(date2);
cal1.clear(Calendar.MILLISECOND);
cal2.clear(Calendar.MILLISECOND);
cal1.clear(Calendar.SECOND);
cal2.clear(Calendar.SECOND);
cal1.clear(Calendar.MINUTE);
cal2.clear(Calendar.MINUTE);
if(type != Calendar.HOUR){
cal1.clear(Calendar.HOUR);
cal2.clear(Calendar.HOUR);
cal1.clear(Calendar.HOUR_OF_DAY);
cal2.clear(Calendar.HOUR_OF_DAY);
//yet i know calendar just won't clear his hours
cal1.set(Calendar.HOUR_OF_DAY, 0);
cal2.set(Calendar.HOUR_OF_DAY, 0);
}
if(type != Calendar.DATE){
cal1.set(Calendar.DATE, 1);
cal2.set(Calendar.DATE, 1);
}
if(type != Calendar.MONTH){
cal1.clear(Calendar.MONTH);
cal2.clear(Calendar.MONTH);
}
//Switch em if needed
if(cal1.after(cal2) || cal1.equals(cal2)){
Calendar backup = cal1;
cal1 = cal2;
cal2 = backup;
toAdd = 1;
}else
{
toAdd = -1;
}
/*if(type != Calendar.YEAR){
cal1.clear(Calendar.YEAR);
cal2.clear(Calendar.YEAR);
}
*/
while(cal1.before(cal2)){
cal1.add(type, 1);
elapsed += toAdd;
}
return elapsed;
}
}

View File

@@ -0,0 +1,46 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
/**
* Represents a simple string facet for filtering.
* Doesn't offer any special interaction.
*
* @author kevinvandevelde at atmire.com
* Date: 23-dec-2008
* Time: 12:44:27
*
*/
public class DatasetTypeGenerator extends DatasetGenerator {
/** The type of our generator (EXAMPLE: country) **/
private String type;
/** The number of values shown (max) **/
private int max;
public DatasetTypeGenerator() {
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public int getMax() {
return max;
}
public void setMax(int max) {
this.max = max;
}
}

View File

@@ -0,0 +1,125 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.client.solrj.SolrServerException;
import org.dspace.content.Item;
import org.dspace.core.Constants;
import org.dspace.statistics.SolrLogger;
import org.dspace.statistics.content.filter.StatisticsFilter;
/**
* Class that will hold the data needed to show
* statistics in the browse and search pages.
*
* User: @author kevinvandevelde at atmire.com (kevin at atmire.com)
* Date: 20-mei-2009
* Time: 16:44:29
*/
public class StatisticsBSAdapter {
private boolean displayItemViews;
private boolean displayBitstreamViews;
private boolean displayTotalViews;
private List<StatisticsFilter> filters;
/** visitType is ITEM */
public static final int ITEM_VISITS = 0;
/** visitType is BITSTREAM */
public static final int BITSTREAM_VISITS = 1;
/** visitType is TOTAL */
public static final int TOTAL_VISITS = 2;
public StatisticsBSAdapter() {
displayItemViews = false;
displayBitstreamViews = false;
filters = new ArrayList<StatisticsFilter>();
}
/**
* Returns the number of visits for the item.
* Depending on the visitType it can either be item, bitstream, total, ...
*
* @param visitType the type of visits we want, from the item, bitstream, total
* @param item the item from which we need our visits
* @return the number of visits
* @throws SolrServerException ....
*/
public long getNumberOfVisits(int visitType, Item item) throws SolrServerException {
switch (visitType){
case ITEM_VISITS:
return SolrLogger.queryTotal("type: " + Constants.ITEM + " AND id: " + item.getID(), resolveFilterQueries()).getCount();
case BITSTREAM_VISITS:
return SolrLogger.queryTotal("type: " + Constants.BITSTREAM + " AND owningItem: " + item.getID(), resolveFilterQueries()).getCount();
case TOTAL_VISITS:
return getNumberOfVisits(ITEM_VISITS, item) + getNumberOfVisits(BITSTREAM_VISITS, item);
}
return -1;
}
private String resolveFilterQueries(){
StringBuilder out = new StringBuilder();
for (int i = 0; i < filters.size(); i++) {
StatisticsFilter statisticsFilter = filters.get(i);
out.append(statisticsFilter.toQuery());
if(i != 0 && (i != filters.size() -1))
{
out.append(" AND ");
}
}
return out.toString();
}
///////////////////////
// GETTERS & SETTERS //
///////////////////////
public boolean isDisplayTotalViews() {
return displayTotalViews;
}
public void setDisplayTotalViews(boolean displayTotalViews) {
this.displayTotalViews = displayTotalViews;
}
public boolean isDisplayItemViews() {
return displayItemViews;
}
public void setDisplayItemViews(boolean displayItemViews) {
this.displayItemViews = displayItemViews;
}
public boolean isDisplayBitstreamViews() {
return displayBitstreamViews;
}
public void setDisplayBitstreamViews(boolean displayBitstreamViews) {
this.displayBitstreamViews = displayBitstreamViews;
}
public List<StatisticsFilter> getFilters() {
return filters;
}
public void addFilter(StatisticsFilter filter){
this.filters.add(filter);
}
public void setFilters(List<StatisticsFilter> filters) {
this.filters = filters;
}
}

View File

@@ -0,0 +1,81 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import org.dspace.statistics.Dataset;
import org.dspace.statistics.content.filter.StatisticsFilter;
import org.dspace.core.Context;
import org.apache.solr.client.solrj.SolrServerException;
import java.util.List;
import java.util.ArrayList;
import java.sql.SQLException;
import java.io.IOException;
import java.text.ParseException;
/**
* Abstract "factory" for statistical queries.
* @author kevinvandevelde at atmire.com
* Date: 23-feb-2009
* Time: 12:37:04
*/
public abstract class StatisticsData {
private Dataset dataset;
private List<DatasetGenerator> datasetgenerators;
private List<StatisticsFilter> filters;
/** Construct a blank query factory. */
protected StatisticsData() {
datasetgenerators = new ArrayList<DatasetGenerator>(2);
filters = new ArrayList<StatisticsFilter>();
}
/** Wrap an existing Dataset in an unconfigured query factory. */
protected StatisticsData(Dataset dataset) {
this.dataset = dataset;
datasetgenerators = new ArrayList<DatasetGenerator>(2);
filters = new ArrayList<StatisticsFilter>();
}
/** Augment the list of facets (generators). */
public void addDatasetGenerator(DatasetGenerator set){
datasetgenerators.add(set);
}
/** Augment the list of filters. */
public void addFilters(StatisticsFilter filter){
filters.add(filter);
}
/** Return the current list of generators. */
public List<DatasetGenerator> getDatasetGenerators() {
return datasetgenerators;
}
/** Return the current list of filters. */
public List<StatisticsFilter> getFilters() {
return filters;
}
/** Return the existing query result if there is one. */
public Dataset getDataset() {
return dataset;
}
/** Jam an existing query result in. */
public void setDataset(Dataset dataset) {
this.dataset = dataset;
}
/** Run the accumulated query and return its results. */
public abstract Dataset createDataset(Context context) throws SQLException,
SolrServerException, IOException, ParseException;
}

View File

@@ -0,0 +1,238 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.dspace.content.DSpaceObject;
import org.dspace.core.Context;
import org.dspace.statistics.Dataset;
import org.dspace.statistics.ObjectCount;
import org.dspace.statistics.SolrLogger;
import org.dspace.statistics.content.filter.StatisticsFilter;
import org.dspace.utils.DSpace;
import java.io.IOException;
import java.sql.SQLException;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
/**
* A statistics data implementation that will query the statistics backend for search information
*
* @author Kevin Van de Velde (kevin at atmire dot com)
* @author Ben Bosman (ben at atmire dot com)
* @author Mark Diggory (markd at atmire dot com)
*/
public class StatisticsDataSearches extends StatisticsData {
private static final DecimalFormat pageViewFormat = new DecimalFormat("0.00");
private static final DecimalFormat percentageFormat = new DecimalFormat("0.00%");
/** Current DSpaceObject for which to generate the statistics. */
private DSpaceObject currentDso;
public StatisticsDataSearches(DSpaceObject dso) {
super();
this.currentDso = dso;
}
@Override
public Dataset createDataset(Context context) throws SQLException, SolrServerException, IOException, ParseException {
// Check if we already have one.
// If we do then give it back.
if(getDataset() != null)
{
return getDataset();
}
List<StatisticsFilter> filters = getFilters();
List<String> defaultFilters = new ArrayList<String>();
for (StatisticsFilter statisticsFilter : filters) {
defaultFilters.add(statisticsFilter.toQuery());
}
String defaultFilterQuery = StringUtils.join(defaultFilters.iterator(), " AND ");
String query = getQuery();
Dataset dataset = new Dataset(0,0);
List<DatasetGenerator> datasetGenerators = getDatasetGenerators();
if(0 < datasetGenerators.size()){
//At the moment we can only have one dataset generator
DatasetGenerator datasetGenerator = datasetGenerators.get(0);
if(datasetGenerator instanceof DatasetSearchGenerator){
DatasetSearchGenerator typeGenerator = (DatasetSearchGenerator) datasetGenerator;
if(typeGenerator.getMode() == DatasetSearchGenerator.Mode.SEARCH_OVERVIEW){
StringBuilder fqBuffer = new StringBuilder(defaultFilterQuery);
if(0 < fqBuffer.length())
{
fqBuffer.append(" AND ");
}
fqBuffer.append(getSearchFilterQuery());
ObjectCount[] topCounts = SolrLogger.queryFacetField(query, fqBuffer.toString(), typeGenerator.getType(), typeGenerator.getMax(), (typeGenerator.isPercentage() || typeGenerator.isIncludeTotal()), null);
long totalCount = -1;
if(typeGenerator.isPercentage() && 0 < topCounts.length){
//Retrieve the total required to calculate the percentage
totalCount = topCounts[topCounts.length - 1].getCount();
//Remove the total count from view !
topCounts = (ObjectCount[]) ArrayUtils.subarray(topCounts, 0, topCounts.length - 1);
}
int nrColumns = 2;
if(typeGenerator.isPercentage()){
nrColumns++;
}
if(typeGenerator.isRetrievePageViews()){
nrColumns++;
}
dataset = new Dataset(topCounts.length, nrColumns);
dataset.setColLabel(0, "search-terms");
dataset.setColLabel(1, "searches");
if(typeGenerator.isPercentage()){
dataset.setColLabel(2, "percent-total");
}
if(typeGenerator.isRetrievePageViews()){
dataset.setColLabel(3, "views-search");
}
for (int i = 0; i < topCounts.length; i++) {
ObjectCount queryCount = topCounts[i];
dataset.setRowLabel(i, String.valueOf(i + 1));
String displayedValue = queryCount.getValue();
if(new DSpace().getConfigurationService().getPropertyAsType("usage-statistics.search.statistics.unescape.queries", Boolean.TRUE)){
displayedValue = displayedValue.replace("\\", "");
}
dataset.addValueToMatrix(i, 0, displayedValue);
dataset.addValueToMatrix(i, 1, queryCount.getCount());
if(typeGenerator.isPercentage()){
//Calculate our percentage from the total !
dataset.addValueToMatrix(i, 2, percentageFormat.format(((float) queryCount.getCount() / totalCount)));
}
if(typeGenerator.isRetrievePageViews()){
String queryString = ClientUtils.escapeQueryChars(queryCount.getValue());
if(queryString.equals("")){
queryString = "\"\"";
}
ObjectCount totalPageViews = getTotalPageViews("query:" + queryString, defaultFilterQuery);
dataset.addValueToMatrix(i, 3, pageViewFormat.format((float) totalPageViews.getCount() / queryCount.getCount()));
}
}
}else
if(typeGenerator.getMode() == DatasetSearchGenerator.Mode.SEARCH_OVERVIEW_TOTAL){
//Retrieve the total counts !
ObjectCount totalCount = SolrLogger.queryTotal(query, getSearchFilterQuery());
//Retrieve the filtered count by using the default filter query
StringBuilder fqBuffer = new StringBuilder(defaultFilterQuery);
if(0 < fqBuffer.length())
{
fqBuffer.append(" AND ");
}
fqBuffer.append(getSearchFilterQuery());
ObjectCount totalFiltered = SolrLogger.queryTotal(query, fqBuffer.toString());
fqBuffer = new StringBuilder(defaultFilterQuery);
if(0 < fqBuffer.length())
{
fqBuffer.append(" AND ");
}
fqBuffer.append("statistics_type:").append(SolrLogger.StatisticsType.SEARCH_RESULT.text());
ObjectCount totalPageViews = getTotalPageViews(query, defaultFilterQuery);
dataset = new Dataset(1, 3);
dataset.setRowLabel(0, "");
dataset.setColLabel(0, "searches");
dataset.addValueToMatrix(0, 0, totalFiltered.getCount());
dataset.setColLabel(1, "percent-total");
//Ensure that we do NOT divide by 0
float percentTotal;
if(totalCount.getCount() == 0){
percentTotal = 0;
}else{
percentTotal = (float) totalFiltered.getCount() / totalCount.getCount();
}
dataset.addValueToMatrix(0, 1, percentageFormat.format(percentTotal));
dataset.setColLabel(2, "views-search");
//Ensure that we do NOT divide by 0
float pageViews;
if(totalFiltered.getCount() == 0){
pageViews = 0;
}else{
pageViews = (float) totalPageViews.getCount() / totalFiltered.getCount();
}
dataset.addValueToMatrix(0, 2, pageViewFormat.format(pageViews));
}
}else{
throw new IllegalArgumentException("Data generator with class" + datasetGenerator.getClass().getName() + " is not supported by the statistics search engine !");
}
}
return dataset;
}
/**
* Returns the query to be used in solr
* in case of a dso a scopeDso query will be returned otherwise the default *:* query will be used
* @return the query as a string
*/
protected String getQuery() {
String query;
if(currentDso != null){
query = "scopeType: " + currentDso.getType() + " AND scopeId: " + currentDso.getID();
}else{
query = "*:*";
}
return query;
}
private ObjectCount getTotalPageViews(String query, String defaultFilterQuery) throws SolrServerException {
StringBuilder fqBuffer;
fqBuffer = new StringBuilder(defaultFilterQuery);
if(0 < fqBuffer.length())
{
fqBuffer.append(" AND ");
}
fqBuffer.append("statistics_type:").append(SolrLogger.StatisticsType.SEARCH_RESULT.text());
//Retrieve the number of page views by this query !
return SolrLogger.queryTotal(query, fqBuffer.toString());
}
/**
* Returns a filter query that only allows new searches to pass
* new searches are searches that haven't been paged through
* @return a solr filterquery
*/
private String getSearchFilterQuery() {
StringBuilder fqBuffer = new StringBuilder();
fqBuffer.append("statistics_type:").append(SolrLogger.StatisticsType.SEARCH.text());
//Also append a filter query to ensure that paging is left out !
fqBuffer.append(" AND -page:[* TO *]");
return fqBuffer.toString();
}
}

View File

@@ -0,0 +1,795 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import org.apache.commons.lang.StringUtils;
import org.dspace.content.*;
import org.dspace.statistics.Dataset;
import org.dspace.statistics.ObjectCount;
import org.dspace.statistics.SolrLogger;
import org.dspace.statistics.content.filter.StatisticsFilter;
import org.dspace.statistics.content.filter.StatisticsSolrDateFilter;
import org.dspace.statistics.util.LocationUtils;
import org.dspace.core.Context;
import org.dspace.core.Constants;
import org.dspace.core.ConfigurationManager;
import org.dspace.handle.HandleManager;
import org.dspace.app.util.Util;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.util.ClientUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
import java.sql.SQLException;
import java.text.ParseException;
import java.io.UnsupportedEncodingException;
/**
* Query factory associated with a DSpaceObject.
* Encapsulates the raw data, independent of rendering.
* <p>
* To use:
* <ol>
* <li>Instantiate, passing a reference to the interesting DSO.</li>
* <li>Add a {@link DatasetDSpaceObjectGenerator} for the appropriate object type.</li>
* <li>Add other generators as required to get the statistic you want.</li>
* <li>Add {@link org.dspace.statistics.content.filter filters} as required.</li>
* <li>{@link #createDataset(Context)} will run the query and return a result matrix.
* Subsequent calls skip the query and return the same matrix.</li>
* </ol>
*
* @author kevinvandevelde at atmire.com
* Date: 23-feb-2009
* Time: 12:25:20
*/
public class StatisticsDataVisits extends StatisticsData
{
/** Current DSpaceObject for which to generate the statistics. */
private DSpaceObject currentDso;
/** Construct a completely uninitialized query. */
public StatisticsDataVisits()
{
}
/** Construct an empty query concerning a given DSpaceObject. */
public StatisticsDataVisits(DSpaceObject dso)
{
super();
this.currentDso = dso;
}
/** Construct an unconfigured query around a given DSO and Dataset. */
public StatisticsDataVisits(DSpaceObject currentDso, Dataset dataset)
{
super(dataset);
this.currentDso = currentDso;
}
/** Construct an unconfigured query around a given Dataset. */
public StatisticsDataVisits(Dataset dataset)
{
super(dataset);
}
public Dataset createDataset(Context context) throws SQLException,
SolrServerException, ParseException
{
// Check if we already have one.
// If we do then give it back.
if(getDataset() != null)
{
return getDataset();
}
///////////////////////////
// 1. DETERMINE OUR AXIS //
///////////////////////////
ArrayList<DatasetQuery> datasetQueries = new ArrayList<DatasetQuery>();
for (int i = 0; i < getDatasetGenerators().size(); i++) {
DatasetGenerator dataSet = getDatasetGenerators().get(i);
processAxis(dataSet, datasetQueries);
}
// Now lets determine our values.
// First check if we have a date facet & if so find it.
DatasetTimeGenerator dateFacet = null;
if (getDatasetGenerators().get(0) instanceof DatasetTimeGenerator
|| (1 < getDatasetGenerators().size() && getDatasetGenerators()
.get(1) instanceof DatasetTimeGenerator))
{
if(getDatasetGenerators().get(0) instanceof DatasetTimeGenerator)
{
dateFacet = (DatasetTimeGenerator) getDatasetGenerators().get(0);
}
else
{
dateFacet = (DatasetTimeGenerator) getDatasetGenerators().get(1);
}
}
/////////////////////////
// 2. DETERMINE VALUES //
/////////////////////////
boolean showTotal = false;
// Check if we need our total
if ((getDatasetGenerators().get(0) != null && getDatasetGenerators()
.get(0).isIncludeTotal())
|| (1 < getDatasetGenerators().size()
&& getDatasetGenerators().get(1) != null && getDatasetGenerators()
.get(1).isIncludeTotal()))
{
showTotal = true;
}
if (dateFacet != null && dateFacet.getActualStartDate() != null
&& dateFacet.getActualEndDate() != null)
{
StatisticsSolrDateFilter dateFilter = new StatisticsSolrDateFilter();
dateFilter.setStartDate(dateFacet.getActualStartDate());
dateFilter.setEndDate(dateFacet.getActualEndDate());
dateFilter.setTypeStr(dateFacet.getDateType());
addFilters(dateFilter);
}
else if (dateFacet != null && dateFacet.getStartDate() != null
&& dateFacet.getEndDate() != null)
{
StatisticsSolrDateFilter dateFilter = new StatisticsSolrDateFilter();
dateFilter.setStartStr(dateFacet.getStartDate());
dateFilter.setEndStr(dateFacet.getEndDate());
dateFilter.setTypeStr(dateFacet.getDateType());
addFilters(dateFilter);
}
// Determine our filterQuery
String filterQuery = "";
for (int i = 0; i < getFilters().size(); i++) {
StatisticsFilter filter = getFilters().get(i);
filterQuery += "(" + filter.toQuery() + ")";
if(i != (getFilters().size() -1))
{
filterQuery += " AND ";
}
}
if(StringUtils.isNotBlank(filterQuery)){
filterQuery += " AND ";
}
//Only use the view type and make sure old data (where no view type is present) is also supported
//Solr doesn't explicitly apply boolean logic, so this query cannot be simplified to an OR query
filterQuery += "-(statistics_type:[* TO *] AND -statistics_type:" + SolrLogger.StatisticsType.VIEW.text() + ")";
// System.out.println("FILTERQUERY: " + filterQuery);
// We determine our values on the queries resolved above
Dataset dataset = null;
// Run over our queries.
// First how many queries do we have ?
if(dateFacet != null){
// So do all the queries and THEN do the date facet
for (int i = 0; i < datasetQueries.size(); i++) {
DatasetQuery dataSetQuery = datasetQueries.get(i);
if(dataSetQuery.getQueries().size() != 1){
// TODO: do this
}else{
String query = dataSetQuery.getQueries().get(0).getQuery();
if(dataSetQuery.getMax() == -1){
// We are asking from our current query all the visits faceted by date
ObjectCount[] results = SolrLogger.queryFacetDate(query, filterQuery, dataSetQuery.getMax(), dateFacet.getDateType(), dateFacet.getStartDate(), dateFacet.getEndDate(), showTotal);
dataset = new Dataset(1, results.length);
// Now that we have our results put em in a matrix
for(int j = 0; j < results.length; j++){
dataset.setColLabel(j, results[j].getValue());
dataset.addValueToMatrix(0, j, results[j].getCount());
}
// TODO: change this !
// Now add the column label
dataset.setRowLabel(0, getResultName(dataSetQuery.getName(), dataSetQuery, context));
dataset.setRowLabelAttr(0, getAttributes(dataSetQuery.getName(), dataSetQuery, context));
}else{
// We need to get the max objects and the next part of the query on them (next part beeing the datasettimequery
ObjectCount[] maxObjectCounts = SolrLogger.queryFacetField(query, filterQuery, dataSetQuery.getFacetField(), dataSetQuery.getMax(), false, null);
for (int j = 0; j < maxObjectCounts.length; j++) {
ObjectCount firstCount = maxObjectCounts[j];
String newQuery = dataSetQuery.getFacetField() + ": " + ClientUtils.escapeQueryChars(firstCount.getValue()) + " AND " + query;
ObjectCount[] maxDateFacetCounts = SolrLogger.queryFacetDate(newQuery, filterQuery, dataSetQuery.getMax(), dateFacet.getDateType(), dateFacet.getStartDate(), dateFacet.getEndDate(), showTotal);
// Make sure we have a dataSet
if(dataset == null)
{
dataset = new Dataset(maxObjectCounts.length, maxDateFacetCounts.length);
}
// TODO: this is a very dirty fix change this ! ! ! ! ! !
dataset.setRowLabel(j, getResultName(firstCount.getValue(), dataSetQuery, context));
dataset.setRowLabelAttr(j, getAttributes(firstCount.getValue(), dataSetQuery, context));
for (int k = 0; k < maxDateFacetCounts.length; k++) {
ObjectCount objectCount = maxDateFacetCounts[k];
// No need to add this many times
if(j == 0)
{
dataset.setColLabel(k, objectCount.getValue());
}
dataset.addValueToMatrix(j, k, objectCount.getCount());
}
}
if(dataset != null && !(getDatasetGenerators().get(0) instanceof DatasetTimeGenerator)){
dataset.flipRowCols();
}
}
}
}
}else{
// We do NOT have a date facet so just do queries after each other
/*
for (int i = 0; i < datasetQueries.size(); i++) {
DatasetQuery datasetQuery = datasetQueries.get(i);
if(datasetQuery.getQueries().size() != 1){
// TODO: do this
}else{
String query = datasetQuery.getQueries().get(0);
// Loop over the queries & do em
// ObjectCount[] topCounts = SolrLogger.queryFacetField(query, );
}
}
*/
DatasetQuery firsDataset = datasetQueries.get(0);
//Do the first query
ObjectCount[] topCounts1 = null;
// if(firsDataset.getQueries().size() == 1){
topCounts1 = queryFacetField(firsDataset, firsDataset.getQueries().get(0).getQuery(), filterQuery);
// }else{
// TODO: do this
// }
// Check if we have more queries that need to be done
if(datasetQueries.size() == 2){
DatasetQuery secondDataSet = datasetQueries.get(1);
// Now do the second one
ObjectCount[] topCounts2 = queryFacetField(secondDataSet, secondDataSet.getQueries().get(0).getQuery(), filterQuery);
// Now that have results for both of them lets do x.y queries
List<String> facetQueries = new ArrayList<String>();
for (ObjectCount count2 : topCounts2) {
String facetQuery = secondDataSet.getFacetField() + ":" + ClientUtils.escapeQueryChars(count2.getValue());
// Check if we also have a type present (if so this should be put into the query)
if ("id".equals(secondDataSet.getFacetField()) && secondDataSet.getQueries().get(0).getDsoType() != -1)
{
facetQuery += " AND type:" + secondDataSet.getQueries().get(0).getDsoType();
}
facetQueries.add(facetQuery);
}
for (int i = 0; i < topCounts1.length; i++){
ObjectCount count1 = topCounts1[i];
ObjectCount[] currentResult = new ObjectCount[topCounts2.length];
// Make sure we have a dataSet
if(dataset == null)
{
dataset = new Dataset(topCounts2.length, topCounts1.length);
}
dataset.setColLabel(i, getResultName(count1.getValue(), firsDataset, context));
dataset.setColLabelAttr(i, getAttributes(count1.getValue(), firsDataset, context));
String query = firsDataset.getFacetField() + ":" + ClientUtils.escapeQueryChars(count1.getValue());
// Check if we also have a type present (if so this should be put into the query)
if("id".equals(firsDataset.getFacetField()) && firsDataset.getQueries().get(0).getDsoType() != -1)
{
query += " AND type:" + firsDataset.getQueries().get(0).getDsoType();
}
Map<String, Integer> facetResult = SolrLogger.queryFacetQuery(query, filterQuery, facetQueries);
// TODO: the show total
// No need to add this many times
// TODO: dit vervangen door te displayen value
for (int j = 0; j < topCounts2.length; j++) {
ObjectCount count2 = topCounts2[j];
if(i == 0) {
dataset.setRowLabel(j, getResultName(count2.getValue(), secondDataSet, context));
dataset.setRowLabelAttr(j, getAttributes(count2.getValue(), secondDataSet, context));
}
// Get our value the value is the same as the query
String facetQuery = secondDataSet.getFacetField() + ":" + ClientUtils.escapeQueryChars(count2.getValue());
// Check if we also have a type present (if so this should be put into the query
if ("id".equals(secondDataSet.getFacetField()) && secondDataSet.getQueries().get(0).getDsoType() != -1)
{
facetQuery += " AND type:" + secondDataSet.getQueries().get(0).getDsoType();
}
// We got our query so now get the value
dataset.addValueToMatrix(j, i, facetResult.get(facetQuery));
}
/*
for (int j = 0; j < topCounts2.length; j++) {
ObjectCount count2 = topCounts2[j];
String query = firsDataset.getFacetField() + ":" + count1.getValue();
// Check if we also have a type present (if so this should be put into the query
if("id".equals(firsDataset.getFacetField()) && firsDataset.getQueries().get(0).getDsoType() != -1)
query += " AND type:" + firsDataset.getQueries().get(0).getDsoType();
query += " AND " + secondDataSet.getFacetField() + ":" + count2.getValue();
// Check if we also have a type present (if so this should be put into the query
if("id".equals(secondDataSet.getFacetField()) && secondDataSet.getQueries().get(0).getDsoType() != -1)
query += " AND type:" + secondDataSet.getQueries().get(0).getDsoType();
long count = SolrLogger.queryFacetQuery(query, filterQuery);
// TODO: the show total
// No need to add this many times
// TODO: dit vervangen door te displayen value
if(i == 0) {
dataset.setRowLabel(j, getResultName(count2.getValue(), secondDataSet, context));
dataset.setRowLabelAttr(j, getAttributes(count2.getValue(), secondDataSet, context));
}
dataset.addValueToMatrix(j, i, count);
}
*/
}
// System.out.println("BOTH");
} else{
// Make sure we have a dataSet
dataset = new Dataset(1, topCounts1.length);
for (int i = 0; i < topCounts1.length; i++) {
ObjectCount count = topCounts1[i];
dataset.setColLabel(i, getResultName(count.getValue(), firsDataset, context));
dataset.setColLabelAttr(i, getAttributes(count.getValue(), firsDataset, context));
dataset.addValueToMatrix(0, i, count.getCount());
}
}
}
if(dataset != null){
dataset.setRowTitle("Dataset 1");
dataset.setColTitle("Dataset 2");
}else
{
dataset = new Dataset(0, 0);
}
return dataset;
}
private void processAxis(DatasetGenerator datasetGenerator, List<DatasetQuery> queries) throws SQLException {
if(datasetGenerator instanceof DatasetDSpaceObjectGenerator){
DatasetDSpaceObjectGenerator dspaceObjAxis = (DatasetDSpaceObjectGenerator) datasetGenerator;
// Get the types involved
List<DSORepresentation> dsoRepresentations = dspaceObjAxis.getDsoRepresentations();
for (int i = 0; i < dsoRepresentations.size(); i++){
DatasetQuery datasetQuery = new DatasetQuery();
Integer dsoType = dsoRepresentations.get(i).getType();
boolean separate = dsoRepresentations.get(i).getSeparate();
Integer dsoLength = dsoRepresentations.get(i).getNameLength();
// Check if our type is our current object
if(currentDso != null && dsoType == currentDso.getType()){
Query query = new Query();
query.setDso(currentDso.getID(), currentDso.getType(), dsoLength);
datasetQuery.addQuery(query);
}else{
// TODO: only do this for bitstreams from an item
Query query = new Query();
if(currentDso != null && separate && dsoType == Constants.BITSTREAM){
// CURRENTLY THIS IS ONLY POSSIBLE FOR AN ITEM ! ! ! ! ! ! !
// We need to get the separate bitstreams from our item and make a query for each of them
Item item = (Item) currentDso;
for (int j = 0; j < item.getBundles().length; j++) {
Bundle bundle = item.getBundles()[j];
for (int k = 0; k < bundle.getBitstreams().length; k++) {
Bitstream bitstream = bundle.getBitstreams()[k];
if(!bitstream.getFormat().isInternal()){
// Add a separate query for each bitstream
query.setDso(bitstream.getID(), bitstream.getType(), dsoLength);
}
}
}
} else {
// We have something else than our current object.
// So we need some kind of children from it, so put this in our query
query.setOwningDso(currentDso);
query.setDsoLength(dsoLength);
String title = "";
switch(dsoType){
case Constants.BITSTREAM:
title = "Files";
break;
case Constants.ITEM:
title = "Items";
break;
case Constants.COLLECTION:
title = "Collections";
break;
case Constants.COMMUNITY:
title = "Communities";
break;
}
datasetQuery.setName(title);
// Put the type in so we only get the children of the type specified
query.setDsoType(dsoType);
}
datasetQuery.addQuery(query);
}
datasetQuery.setFacetField("id");
datasetQuery.setMax(dsoRepresentations.get(i).getMax());
queries.add(datasetQuery);
}
}else
if(datasetGenerator instanceof DatasetTypeGenerator){
DatasetTypeGenerator typeAxis = (DatasetTypeGenerator) datasetGenerator;
DatasetQuery datasetQuery = new DatasetQuery();
// First make sure our query is in order
Query query = new Query();
if(currentDso != null)
{
query.setDso(currentDso.getID(), currentDso.getType());
}
datasetQuery.addQuery(query);
// Then add the rest
datasetQuery.setMax(typeAxis.getMax());
datasetQuery.setFacetField(typeAxis.getType());
datasetQuery.setName(typeAxis.getType());
queries.add(datasetQuery);
}
}
/**
* Gets the name of the DSO (example for collection: ((Collection) dso).getname();
* @return the name of the given DSO
*/
private String getResultName(String value, DatasetQuery datasetQuery,
Context context) throws SQLException
{
if("continent".equals(datasetQuery.getName())){
value = LocationUtils.getContinentName(value, context
.getCurrentLocale());
}else
if("countryCode".equals(datasetQuery.getName())){
value = LocationUtils.getCountryName(value, context
.getCurrentLocale());
}else{
Query query = datasetQuery.getQueries().get(0);
//TODO: CHANGE & THROW AWAY THIS ENTIRE METHOD
//Check if int
int dsoId;
int dsoLength = query.getDsoLength();
try {
dsoId = Integer.parseInt(value);
}catch(Exception e){
dsoId = -1;
}
if(dsoId == -1 && query.getDsoId() != -1 && value == null)
{
dsoId = query.getDsoId();
}
if(dsoId != -1 && query.dsoType != -1){
DSpaceObject dso = DSpaceObject.find(context, query.getDsoType(), dsoId);
if(dso != null){
switch(dso.getType()){
case Constants.BITSTREAM:
Bitstream bit = (Bitstream) dso;
return bit.getName();
case Constants.ITEM:
Item item = (Item) dso;
String name = "untitled";
DCValue[] vals = item.getMetadata("dc", "title", null, Item.ANY);
if(vals != null && 0 < vals.length)
{
name = vals[0].value;
}
if(dsoLength != -1 && name.length() > dsoLength){
//Cut it off at the first space
int firstSpace = name.indexOf(' ', dsoLength);
if(firstSpace != -1){
name = name.substring(0, firstSpace) + " ...";
}
}
return name;
case Constants.COLLECTION:
Collection coll = (Collection) dso;
name = coll.getName();
if(dsoLength != -1 && name.length() > dsoLength){
//Cut it off at the first space
int firstSpace = name.indexOf(' ', dsoLength);
if(firstSpace != -1){
name = name.substring(0, firstSpace) + " ...";
}
}
return name;
case Constants.COMMUNITY:
Community comm = (Community) dso;
name = comm.getName();
if(dsoLength != -1 && name.length() > dsoLength){
//Cut it off at the first space
int firstSpace = name.indexOf(' ', dsoLength);
if(firstSpace != -1){
name = name.substring(0, firstSpace) + " ...";
}
}
return name;
}
}
}
}
return value;
}
private Map<String, String> getAttributes(String value,
DatasetQuery datasetQuery, Context context) throws SQLException
{
HashMap<String, String> attrs = new HashMap<String, String>();
Query query = datasetQuery.getQueries().get(0);
//TODO: CHANGE & THROW AWAY THIS ENTIRE METHOD
//Check if int
int dsoId;
try {
dsoId = Integer.parseInt(value);
}catch(Exception e){
dsoId = -1;
}
if(dsoId == -1 && query.getDsoId() != -1 && value == null)
{
dsoId = query.getDsoId();
}
if(dsoId != -1 && query.dsoType != -1){
DSpaceObject dso = DSpaceObject.find(context, query.getDsoType(), dsoId);
if(dso != null){
switch(dso.getType()){
case Constants.BITSTREAM:
Bitstream bit = (Bitstream) dso;
//Get our owning item
Item owningItem = null;
Bundle[] bunds = bit.getBundles();
if(0 < bunds.length && 0 < bunds[0].getItems().length)
{
owningItem = bunds[0].getItems()[0];
}
// If possible reference this bitstream via a handle, however this may
// be null if a handle has not yet been assigned. In this case reference the
// item its internal id. In the last case where the bitstream is not associated
// with an item (such as a community logo) then reference the bitstreamID directly.
String identifier = null;
if (owningItem != null && owningItem.getHandle() != null)
{
identifier = "handle/" + owningItem.getHandle();
}
else if (owningItem != null)
{
identifier = "item/" + owningItem.getID();
}
else
{
identifier = "id/" + bit.getID();
}
String url = ConfigurationManager.getProperty("dspace.url") + "/bitstream/"+identifier+"/";
// If we can put the pretty name of the bitstream on the end of the URL
try
{
if (bit.getName() != null)
{
url += Util.encodeBitstreamName(bit.getName(), "UTF-8");
}
}
catch (UnsupportedEncodingException uee)
{
// Just ignore it: we don't have to have a pretty
// name on the end of the URL because the sequence id will
// locate it. However it means that links in this file might
// not work....
}
url += "?sequence="+bit.getSequenceID();
attrs.put("url", url);
break;
case Constants.ITEM:
Item item = (Item) dso;
attrs.put("url", HandleManager.resolveToURL(context, item.getHandle()));
break;
case Constants.COLLECTION:
Collection coll = (Collection) dso;
attrs.put("url", HandleManager.resolveToURL(context, coll.getHandle()));
break;
case Constants.COMMUNITY:
Community comm = (Community) dso;
attrs.put("url", HandleManager.resolveToURL(context, comm.getHandle()));
break;
}
}
}
return attrs;
}
private ObjectCount[] queryFacetField(DatasetQuery dataset, String query,
String filterQuery) throws SolrServerException
{
String facetType = dataset.getFacetField() == null ? "id" : dataset
.getFacetField();
return SolrLogger.queryFacetField(query, filterQuery, facetType,
dataset.getMax(), false, null);
}
public static class DatasetQuery {
private String name;
private int max;
private String facetField;
private List<Query> queries;
public DatasetQuery() {
queries = new ArrayList<Query>();
}
public int getMax() {
return max;
}
public void setMax(int max) {
this.max = max;
}
public void addQuery(Query q){
queries.add(q);
}
public List<Query> getQueries() {
return queries;
}
public String getFacetField() {
return facetField;
}
public void setFacetField(String facetField) {
this.facetField = facetField;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
public class Query {
private int dsoType;
private int dsoId;
private int dsoLength;
private DSpaceObject owningDso;
public Query() {
dsoId = -1;
dsoType = -1;
dsoLength = -1;
owningDso = null;
}
public void setOwningDso(DSpaceObject owningDso) {
this.owningDso = owningDso;
}
public void setDso(int dsoId, int dsoType){
this.dsoId = dsoId;
this.dsoType = dsoType;
}
public void setDso(int dsoId, int dsoType, int length){
this.dsoId = dsoId;
this.dsoType = dsoType;
this.dsoLength = length;
}
public void setDsoType(int dsoType) {
this.dsoType = dsoType;
}
public int getDsoLength() {
return dsoLength;
}
public void setDsoLength(int dsoLength) {
this.dsoLength = dsoLength;
}
public int getDsoId() {
return dsoId;
}
public int getDsoType(){
return dsoType;
}
public String getQueryResultName(){
//TODO: This has got to be done differently in case we have a string query.
//This is just a temporary solution so we can get on with our work.
return dsoType + ":" + dsoId;
}
public String getQuery() {
//Time to construct our query
String query = "";
//Check (& add if needed) the dsoType
if(dsoType != -1)
{
query += "type: " + dsoType;
}
//Check (& add if needed) the dsoId
if(dsoId != -1)
{
query += (query.equals("") ? "" : " AND ") + " id:" + dsoId;
}
if(owningDso != null && currentDso != null){
query += (query.equals("") ? "" : " AND " );
String owningStr = "";
switch(currentDso.getType()){
case Constants.ITEM:
owningStr = "owningItem";
break;
case Constants.COLLECTION:
owningStr = "owningColl";
break;
case Constants.COMMUNITY:
owningStr = "owningComm";
break;
}
owningStr += ":" + currentDso.getID();
query += owningStr;
}
if(query.equals(""))
{
query = "*:*";
}
return query;
}
}
}

View File

@@ -0,0 +1,202 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.dspace.content.DCDate;
import org.dspace.content.DSpaceObject;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.services.ConfigurationService;
import org.dspace.statistics.Dataset;
import org.dspace.statistics.ObjectCount;
import org.dspace.statistics.SolrLogger;
import org.dspace.statistics.content.filter.StatisticsFilter;
import org.dspace.utils.DSpace;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.*;
/**
* A workflow data implementation that will query the statistics backend for workflow information
*
* @author Kevin Van de Velde (kevin at atmire dot com)
* @author Ben Bosman (ben at atmire dot com)
* @author Mark Diggory (markd at atmire dot com)
*/
public class StatisticsDataWorkflow extends StatisticsData {
private static final Logger log = Logger.getLogger(StatisticsDataWorkflow.class);
/** Current DSpaceObject for which to generate the statistics. */
private DSpaceObject currentDso;
/** Variable used to indicate of how many months an average is required (-1 is inactive) **/
private int averageMonths = -1;
public StatisticsDataWorkflow(DSpaceObject dso, int averageMonths) {
super();
this.currentDso = dso;
this.averageMonths = averageMonths;
}
@Override
public Dataset createDataset(Context context) throws SQLException, SolrServerException, IOException, ParseException {
// Check if we already have one.
// If we do then give it back.
if(getDataset() != null)
{
return getDataset();
}
List<StatisticsFilter> filters = getFilters();
List<String> defaultFilters = new ArrayList<String>();
for (StatisticsFilter statisticsFilter : filters) {
defaultFilters.add(statisticsFilter.toQuery());
}
String defaultFilterQuery = StringUtils.join(defaultFilters.iterator(), " AND ");
String query = getQuery();
Dataset dataset = new Dataset(0,0);
List<DatasetGenerator> datasetGenerators = getDatasetGenerators();
if(0 < datasetGenerators.size()){
//At the moment we can only have one dataset generator
DatasetGenerator datasetGenerator = datasetGenerators.get(0);
if(datasetGenerator instanceof DatasetTypeGenerator){
DatasetTypeGenerator typeGenerator = (DatasetTypeGenerator) datasetGenerator;
ObjectCount[] topCounts = SolrLogger.queryFacetField(query, defaultFilterQuery, typeGenerator.getType(), typeGenerator.getMax(), typeGenerator.isIncludeTotal(), null);
//Retrieve our total field counts
Map<String, Long> totalFieldCounts = new HashMap<String, Long>();
if(averageMonths != -1){
totalFieldCounts = getTotalFacetCounts(typeGenerator);
}
long monthDifference = 1;
if(getOldestWorkflowItemDate() != null){
monthDifference = getMonthsDifference(new Date(), getOldestWorkflowItemDate());
}
dataset = new Dataset(topCounts.length, (averageMonths != -1 ? 3 : 2));
dataset.setColLabel(0, "step");
dataset.setColLabel(1, "performed");
if(averageMonths != -1){
dataset.setColLabel(2, "average");
}
for (int i = 0; i < topCounts.length; i++) {
ObjectCount topCount = topCounts[i];
dataset.setRowLabel(i, String.valueOf(i + 1));
dataset.addValueToMatrix(i, 0, topCount.getValue());
dataset.addValueToMatrix(i, 1, topCount.getCount());
if(averageMonths != -1){
//Calculate the average of one month
long monthlyAverage = 0;
if(totalFieldCounts.get(topCount.getValue()) != null){
monthlyAverage = totalFieldCounts.get(topCount.getValue()) / monthDifference;
}
//We multiple our average for one month by the number of
dataset.addValueToMatrix(i, 2, (monthlyAverage * averageMonths));
}
}
}
}
return dataset;
}
/**
* Returns the query to be used in solr
* in case of a dso a scopeDso query will be returned otherwise the default *:* query will be used
* @return the query as a string
*/
protected String getQuery() {
String query = "statistics_type:" + SolrLogger.StatisticsType.WORKFLOW.text();
query += " AND NOT(previousWorkflowStep: SUBMIT)";
if(currentDso != null){
if(currentDso.getType() == Constants.COMMUNITY){
query += " AND owningComm:";
}else
if(currentDso.getType() == Constants.COLLECTION){
query += " AND owningColl:";
}
query += currentDso.getID();
}
return query;
}
private int getMonthsDifference(Date date1, Date date2) {
int m1 = date1.getYear() * 12 + date1.getMonth();
int m2 = date2.getYear() * 12 + date2.getMonth();
return m2 - m1 + 1;
}
/**
* Retrieve the total counts for the facets (total count is same query but none of the filter queries
* @param typeGenerator the type generator
* @return as a key the
* @throws org.apache.solr.client.solrj.SolrServerException
*/
protected Map<String, Long> getTotalFacetCounts(DatasetTypeGenerator typeGenerator) throws SolrServerException {
ObjectCount[] objectCounts = SolrLogger.queryFacetField(getQuery(), null, typeGenerator.getType(), -1, false, null);
Map<String, Long> result = new HashMap<String, Long>();
for (ObjectCount objectCount : objectCounts) {
result.put(objectCount.getValue(), objectCount.getCount());
}
return result;
}
protected Date getOldestWorkflowItemDate() throws SolrServerException {
ConfigurationService configurationService = new DSpace().getConfigurationService();
String workflowStartDate = configurationService.getProperty("usage-statistics.workflow-start-date");
if(workflowStartDate == null){
//Query our solr for it !
QueryResponse oldestRecord = SolrLogger.query(getQuery(), null, null, 1, 0, null, null, null, null, "time", true);
if(0 < oldestRecord.getResults().getNumFound()){
SolrDocument solrDocument = oldestRecord.getResults().get(0);
Date oldestDate = (Date) solrDocument.getFieldValue("time");
//Store the date, we only need to retrieve this once !
try {
//Also store it in the solr-statics configuration file, the reason for this being that the sort query
//can be very time consuming & we do not want this delay each time we want to see workflow statistics
String solrConfigDir = configurationService.getProperty("dspace.dir") + File.separator + "config"
+ File.separator + "modules" + File.separator + "usage-statistics.cfg";
PropertiesConfiguration config = new PropertiesConfiguration(solrConfigDir);
config.setProperty("workflow-start-date", new DCDate(oldestDate));
config.save();
} catch (ConfigurationException e) {
log.error("Error while storing workflow start date", e);
}
//ALso store it in our local config !
configurationService.setProperty("usage-statistics.workflow-start-date", new DCDate(oldestDate).toString());
//Write to file
return oldestDate;
}else{
return null;
}
}else{
return new DCDate(workflowStartDate).toDate();
}
}
}

View File

@@ -0,0 +1,118 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
import java.io.IOException;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.client.solrj.SolrServerException;
import org.dspace.core.Context;
import org.dspace.statistics.Dataset;
import org.dspace.statistics.content.filter.StatisticsFilter;
/**
* Encapsulates all data to render the statistics
*
* @author kevinvandevelde at atmire.com
* Date: 23-dec-2008
* Time: 9:27:09
*
*/
public abstract class StatisticsDisplay {
private String id;
private StatisticsData statisticsData;
private String title;
/** css information used to position the display object in a html page**/
private List<String> css;
public void setTitle(String title) {
this.title = title;
}
public String getTitle() {
return title;
}
protected StatisticsDisplay(StatisticsData statisticsData){
this.statisticsData = statisticsData;
}
public List<DatasetGenerator> getDatasetGenerators() {
return statisticsData.getDatasetGenerators();
}
public void addDatasetGenerator(DatasetGenerator set){
statisticsData.addDatasetGenerator(set);
}
public void addFilter(StatisticsFilter filter){
statisticsData.addFilters(filter);
}
public List<StatisticsFilter> getFilters(){
return statisticsData.getFilters();
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public void setDataset(Dataset dataset) {
statisticsData.setDataset(dataset);
}
public abstract String getType();
public Dataset getDataset() {
return statisticsData.getDataset();
}
public Dataset getDataset(Context context) throws SQLException, SolrServerException, IOException, ParseException {
return statisticsData.createDataset(context);
}
public void addCss(String style){
if (style != null) {
if (css == null)
{
css = new ArrayList<String>();
}
css.add(style.trim());
}
}
public String getCss() {
if (css != null) {
StringBuilder result = new StringBuilder();
for (String s : css) {
result.append(s);
if (!s.endsWith(";"))
{
result.append(";");
}
}
return result.toString();
}
return "";
}
}

View File

@@ -0,0 +1,29 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
/**
* Encapsulates all data to render the statistics as a list
*
* @author kevinvandevelde at atmire.com
* Date: 23-dec-2008
* Time: 12:38:58
*
*/
public class StatisticsListing extends StatisticsDisplay {
public StatisticsListing(StatisticsData statisticsData){
super(statisticsData);
}
@Override
public String getType() {
return "listing";
}
}

View File

@@ -0,0 +1,29 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content;
/**
* Encapsulates all data to render the statistics as a table
*
* @author kevinvandevelde at atmire.com
* Date: 23-dec-2008
* Time: 9:27:52
*
*/
public class StatisticsTable extends StatisticsDisplay{
public StatisticsTable(StatisticsData statisticsData){
super(statisticsData);
}
@Override
public String getType() {
return "table";
}
}

View File

@@ -0,0 +1,20 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content.filter;
/**
* A wrapper for some kind of Solr filter expression.
* @author kevinvandevelde at atmire.com
* Date: 12-mrt-2009
* Time: 10:36:03
*/
public interface StatisticsFilter {
/** Convert this filter's configuration to a query string fragment. */
public String toQuery();
}

View File

@@ -0,0 +1,129 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.content.filter;
import org.dspace.statistics.SolrLogger;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Calendar;
/**
* Encapsulate a range of dates for Solr query filtering.
* Created by IntelliJ IDEA.
* User: kevinvandevelde
* Date: 13-mrt-2009
* Time: 13:14:14
*/
public class StatisticsSolrDateFilter implements StatisticsFilter {
private Date startDate;
private Date endDate;
private String startStr;
private String endStr;
private String typeStr;
public StatisticsSolrDateFilter() {
}
/** Set the start date as a string expression.
* Must be paired with {@link #setEndStr(String)}.
*/
public void setStartStr(String startStr) {
this.startStr = startStr;
}
/** Set the end date as a string expression.
* Must be paired with {@link #setStartStr(String)}.
*/
public void setEndStr(String endStr) {
this.endStr = endStr;
}
/** Set the range granularity: DAY, MONTH, or YEAR. */
public void setTypeStr(String typeStr) {
this.typeStr = typeStr;
}
/** Set the start date as a Date object.
* Must be paired with {@link #setEndDate(Date)}.
*/
public void setStartDate(Date startDate) {
this.startDate = (startDate == null ? null : new Date(startDate.getTime()));
}
/** Set the end date as a Date object.
* Must be paired with {@link #setStartDate(Date)}.
*/
public void setEndDate(Date endDate) {
this.endDate = (endDate == null ? null : new Date(endDate.getTime()));
}
/** Convert the date range to a filter expression.
* @return Solr date filter expression
*/
public String toQuery() {
if(startDate == null || endDate == null){
// We have got strings instead of dates so calculate our dates out
// of these strings
Calendar startCal = Calendar.getInstance();
startCal.clear(Calendar.MILLISECOND);
startCal.clear(Calendar.SECOND);
startCal.clear(Calendar.MINUTE);
startCal.set(Calendar.HOUR_OF_DAY, 0);
int dateType = -1;
if(typeStr.equalsIgnoreCase("day")) {
dateType = Calendar.DATE;
} else if(typeStr.equalsIgnoreCase("month")) {
dateType = Calendar.MONTH;
startCal.set(Calendar.DATE, 1);
} else if(typeStr.equalsIgnoreCase("year")) {
startCal.clear(Calendar.MONTH);
startCal.set(Calendar.DATE, 1);
dateType = Calendar.YEAR;
} else
{
return "";
}
Calendar endCal = (Calendar) startCal.clone();
if (startDate == null)
{
if(startStr.startsWith("+"))
{
startStr = startStr.substring(startStr.indexOf('+') + 1);
}
startCal.add(dateType, Integer.parseInt(startStr));
startDate = startCal.getTime();
}
if (endDate == null)
{
if(endStr.startsWith("+"))
{
endStr = endStr.substring(endStr.indexOf('+') + 1);
}
endCal.add(dateType, Integer.parseInt(endStr));
endDate = endCal.getTime();
}
}
//Parse the dates
SimpleDateFormat formatter = new SimpleDateFormat(SolrLogger.DATE_FORMAT_8601);
String startDateParsed = formatter.format(startDate);
String endDateParsed = formatter.format(endDate);
//Create our string
return "time:[" + startDateParsed + " TO " + endDateParsed + "]";
}
}

View File

@@ -0,0 +1,121 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import java.io.*;
import java.util.HashSet;
import java.util.Set;
/**
* Commandline utility to create a file of spider addresses from an Apache
* log file.
*
* @author Mark Diggory (mdiggory at atmire.com)
* @author kevinvandevelde at atmire.com
* @author ben at atmire.com
*/
public class ApacheLogRobotsProcessor {
/**
* Creates a file containing spiders based on an Apache logfile
* by analyzing users of the robots.txt file
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// create an Options object and populate it
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("l", "logfile", true, "type: Input log file");
options.addOption("s", "spiderfile", true, "type: Spider IP file");
CommandLine line = parser.parse(options, args);
// Log source
String logFileLoc;
if (line.hasOption("l"))
{
logFileLoc = line.getOptionValue("l");
}
else {
logFileLoc = "-";
}
// Spider IP list
String spiderIpPath;
if (line.hasOption("s"))
{
spiderIpPath = line.getOptionValue("s");
}
else {
spiderIpPath = "-";
}
//Get the IPs already added in our file
Set<String> logSpiders;
Writer output;
if ("-".equals(spiderIpPath))
{
logSpiders = new HashSet<String>();
output = new BufferedWriter(new OutputStreamWriter(System.out));
}
else
{
File spiderIpFile = new File(spiderIpPath);
if (spiderIpFile.exists())
{
logSpiders = SpiderDetector.readIpAddresses(spiderIpFile);
}
else
{
logSpiders = new HashSet<String>();
}
output = new BufferedWriter(new FileWriter(spiderIpFile));
}
//First read in our log file line per line
BufferedReader in;
if ("-".equals(logFileLoc))
in = new BufferedReader(new InputStreamReader(System.in));
else
in = new BufferedReader(new FileReader(logFileLoc));
String logLine;
while ((logLine = in.readLine()) != null) {
//Currently only check if robot.txt is present in our line
if (logLine.contains("robots.txt")) {
//We got a robots.txt so we got a bot
String ip = logLine.substring(0, logLine.indexOf('-')).trim();
//Only add single IP addresses once we got it in it is enough
logSpiders.add(ip);
}
}
in.close();
//Last but not least add the IPs to our file
for (String ip : logSpiders) {
System.err.println("Adding new ip: " + ip);
//Write each new IP on a separate line
output.write(ip + "\n");
}
output.flush();
output.close();
}
}

View File

@@ -0,0 +1,378 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import org.apache.commons.cli.*;
import org.apache.log4j.Logger;
import org.dspace.app.statistics.LogAnalyser;
import org.dspace.app.statistics.LogLine;
import org.dspace.content.*;
import org.dspace.handle.HandleManager;
import org.dspace.core.Context;
import java.io.*;
import java.sql.SQLException;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.text.SimpleDateFormat;
import java.text.ParsePosition;
/**
* A utility class to convert the classic dspace.log (as generated
* by log4j) files into an intermediate format for ingestion into
* the new solr stats.
*
* @author Stuart Lewis
*/
public class ClassicDSpaceLogConverter {
private Logger log = Logger.getLogger(ClassicDSpaceLogConverter.class);
/** A DSpace context */
private Context context;
/** Whether or not to provide verbose output */
private boolean verbose = false;
/** Whether to include actions logged by org.dspace.usage.LoggerUsageEventListener */
private boolean newEvents = false;
/** A regular expression for extracting the IP address from a log line */
private Pattern ipaddrPattern = Pattern.compile("ip_addr=(\\d*\\.\\d*\\.\\d*\\.\\d*):");
/** Date format (in) from the log line */
private SimpleDateFormat dateFormatIn = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
/** Date format out (for solr) */
private SimpleDateFormat dateFormatOut = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
/** Date format (in) from the log line for the UID */
private SimpleDateFormat dateFormatInUID = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
/** Date format out (for uid) */
private SimpleDateFormat dateFormatOutUID = new SimpleDateFormat("yyyyMMddHHmmssSSS");
/**
* Create an instance of the converter utility
*
* @param c The context
* @param v Whether or not to provide verbose output
* @param nE Whether to include actions logged by org.dspace.usage.LoggerUsageEventListener
*/
public ClassicDSpaceLogConverter(Context c, boolean v, boolean nE)
{
// Set up some variables
context = c;
verbose = v;
newEvents = nE;
}
/**
* Convert a classic log file
*
* @param in The filename to read from
* @param out The filename to write to
* @return The number of lines processed
*/
public int convert(String in, String out)
{
// Line counter
int counter = 0;
int lines = 0;
// Figure out input, output
BufferedReader input;
Writer output;
try {
if (null == in || in.isEmpty() || "-".equals(in))
{
input = new BufferedReader(new InputStreamReader(System.in));
in = "standard input";
}
else
input = new BufferedReader(new FileReader(in));
if (null == out || out.isEmpty() || "-".equals(out))
{
output = new BufferedWriter(new OutputStreamWriter(System.out));
out = "standard output";
}
else
output = new BufferedWriter(new FileWriter(out));
} catch (IOException ie) {
log.error("File access problem", ie);
return 0;
}
// Say what we're going to do
System.err.println(" About to convert '" + in + "' to '" + out + "'");
// Setup the regular expressions for the log file
LogAnalyser.setRegex(in);
// Open the file and read it line by line
try {
String line;
LogLine lline;
String lout;
String id;
String handle;
String ip;
String date;
DSpaceObject dso;
String uid;
String lastLine = "";
while ((line = input.readLine()) != null)
{
// Read inthe line and covnert it to a LogLine
lines++;
if (verbose)
{
System.out.println(" - IN: " + line);
}
lline = LogAnalyser.getLogLine(line);
// Get rid of any lines that aren't INFO
if ((lline == null) || (!lline.isLevel("INFO")))
{
if (verbose)
{
System.out.println(" - IGNORED!");
}
continue;
}
// Get the IP address of the user
Matcher matcher = ipaddrPattern.matcher(line);
if (matcher.find())
{
ip = matcher.group(1);
}
else
{
ip = "unknown";
}
// Get and format the date
// We can use lline.getDate() as this strips the time element
date = dateFormatOut.format(
dateFormatIn.parse(line.substring(0, line.indexOf(',')),
new ParsePosition(0)));
// Generate a UID for the log line
// - based on the date/time
uid = dateFormatOutUID.format(
dateFormatInUID.parse(line.substring(0, line.indexOf(' ', line.indexOf(' ') + 1)),
new ParsePosition(0)));
try
{
// What sort of view is it?
// (ignore lines from org.dspace.usage.LoggerUsageEventListener which is 1.6 code)
if ((lline.getAction().equals("view_bitstream")) &&
(!lline.getParams().contains("invalid_bitstream_id")) &&
(!lline.getParams().contains("withdrawn")) &&
((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
{
id = lline.getParams().substring(13);
}
else if ((lline.getAction().equals("view_item")) &&
((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
{
handle = lline.getParams().substring(7);
dso = HandleManager.resolveToObject(context, handle);
id = "" + dso.getID();
}
else if ((lline.getAction().equals("view_collection")) &&
((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
{
id = lline.getParams().substring(14);
}
else if ((lline.getAction().equals("view_community")) &&
((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
{
id = lline.getParams().substring(13);
}
else
{
//if (verbose) System.out.println(" - IGNORED!");
continue;
}
// Construct the log line
lout = uid + "," +
lline.getAction() + "," +
id + "," +
date + "," +
lline.getUser() + "," +
ip + "\n";
}
catch (Exception e)
{
if (verbose)
{
System.out.println(" - IN: " + line);
}
if (verbose)
{
System.err.println("Error with log line! " + e.getMessage());
}
continue;
}
if ((verbose) && (!"".equals(lout)))
{
System.out.println(" - IN: " + line);
System.out.println(" - OUT: " + lout);
}
// Write the output line
if ((!"".equals(lout)) && (!lout.equals(lastLine)))
{
output.write(lout);
counter++;
lastLine = lout;
}
}
}
catch (IOException e)
{
log.error("File access problem", e);
}
finally
{
// Clean up the input and output streams
try { input.close(); } catch (IOException e) { log.error(e.getMessage(), e); }
try { output.flush(); } catch (IOException e) { log.error(e.getMessage(), e); }
try { output.close(); } catch (IOException e) { log.error(e.getMessage(), e); }
}
// Tell the user what we have done
System.err.println(" Read " + lines + " lines and recorded " + counter + " events");
return counter;
}
/**
* Print the help message
*
* @param options The command line options the user gave
* @param exitCode the system exit code to use
*/
private static void printHelp(Options options, int exitCode)
{
// print the help message
HelpFormatter myhelp = new HelpFormatter();
myhelp.printHelp("ClassicDSpaceLogConverter\n", options);
System.err.println("\n\tClassicDSpaceLogConverter -i infilename -o outfilename -v (for verbose output)");
System.exit(exitCode);
}
/**
* Main method to execute the converter
*
* @param args CLI args
*/
public static void main(String[] args)
{
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("i", "in", true, "source file ('-' or omit for standard input)");
options.addOption("o", "out", true, "destination file or directory ('-' or omit for standard output)");
options.addOption("m", "multiple",false, "treat the input file as having a wildcard ending");
options.addOption("n", "newformat",false, "process new format log lines (1.6+)");
options.addOption("v", "verbose", false, "display verbose output (useful for debugging)");
options.addOption("h", "help", false, "help");
// Parse the command line arguments
CommandLine line;
try
{
line = parser.parse(options, args);
}
catch (ParseException pe)
{
System.err.println("Error parsing command line arguments: " + pe.getMessage());
System.exit(1);
return;
}
// Did the user ask to see the help?
if (line.hasOption('h'))
{
printHelp(options, 0);
}
// Whether or not to include event created by org.dspace.usage.LoggerUsageEventListener
boolean newEvents = line.hasOption('n');
// Create a copy of the converter
Context context = null;
try
{
context = new Context();
context.turnOffAuthorisationSystem();
}
catch (SQLException sqle)
{
System.err.println("Unable to create DSpace context: " + sqle.getMessage());
System.exit(1);
}
ClassicDSpaceLogConverter converter = new ClassicDSpaceLogConverter(context,
line.hasOption('v'),
newEvents);
// Set up the log analyser
try
{
LogAnalyser.readConfig();
}
catch (IOException ioe)
{
System.err.println("Unable to read config file: " + LogAnalyser.getConfigFile());
System.exit(1);
}
// Are we converting multiple files?
if (line.hasOption('m'))
{
// Convert all the files
final File sample = new File(line.getOptionValue('i'));
File dir = sample.getParentFile();
FilenameFilter filter = new FilenameFilter()
{
public boolean accept(File dir, String name)
{
return name.startsWith(sample.getName());
}
};
String[] children = dir.list(filter);
for (String in : children)
{
System.err.println(in);
String out = line.getOptionValue('o') +
(dir.getAbsolutePath() +
System.getProperty("file.separator") + in).substring(line.getOptionValue('i').length());
converter.convert(dir.getAbsolutePath() + System.getProperty("file.separator") + in, out);
}
}
else
{
// Just convert the one file
converter.convert(line.getOptionValue('i'), line.getOptionValue('o'));
}
// Clean everything up
context.restoreAuthSystemState();
context.abort();
}
}

View File

@@ -0,0 +1,47 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import org.dspace.core.ConfigurationManager;
import org.xbill.DNS.*;
import java.io.IOException;
/**
* XBill DNS resolver to retrieve hostnames for client IP addresses.
*
* @author kevinvandevelde at atmire.com
* @author ben at atmire.com
*/
public class DnsLookup {
public static String reverseDns(String hostIp) throws IOException {
Resolver res = new ExtendedResolver();
// set the timeout, defaults to 200 milliseconds
int timeout = ConfigurationManager.getIntProperty("usage-statistics", "resolver.timeout", 200);
res.setTimeout(0, timeout);
Name name = ReverseMap.fromAddress(hostIp);
int type = Type.PTR;
int dclass = DClass.IN;
Record rec = Record.newRecord(name, type, dclass);
Message query = Message.newQuery(rec);
Message response = res.send(query);
Record[] answers = response.getSectionArray(Section.ANSWER);
if (answers.length == 0)
{
return hostIp;
}
else
{
return answers[0].rdataToString();
}
}
}

View File

@@ -0,0 +1,198 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* A Spare v4 IPTable implementation that uses nested HashMaps
* to optimize IP address matching over ranges of IP addresses.
*
* @author mdiggory at atmire.com
*/
public class IPTable {
/* A lookup tree for IP addresses and SubnetRanges */
private Map<String, Map<String, Map<String, Set<String>>>> map =
new HashMap<String, Map<String, Map<String, Set<String>>>>();
/**
* Can be full v4 IP, subnet or range string
*
* @param ip
*/
public void add(String ip) throws IPFormatException {
String[] start;
String[] end;
String[] range = ip.split("-");
if (range.length >= 2) {
start = range[0].trim().split("/")[0].split("\\.");
end = range[1].trim().split("/")[0].split("\\.");
if (start.length != 4 || end.length != 4)
{
throw new IPFormatException(ip + " - Ranges need to be full IPv4 Addresses");
}
if (!(start[0].equals(end[0]) && start[1].equals(end[1]) && start[2].equals(end[2]))) {
throw new IPFormatException(ip + " - Ranges can only be across the last subnet x.y.z.0 - x.y.z.254");
}
} else {
//need to ignore CIDR notation for the moment.
//ip = ip.split("\\/")[0];
String[] subnets = ip.split("\\.");
if (subnets.length < 3) {
throw new IPFormatException(ip + " - require at least three subnet places (255.255.255.0");
}
start = subnets;
end = subnets;
}
if (start.length >= 3) {
Map<String, Map<String, Set<String>>> first = map.get(start[0]);
if (first == null) {
first = new HashMap<String, Map<String, Set<String>>>();
map.put(start[0], first);
}
Map<String, Set<String>> second = first.get(start[1]);
if (second == null) {
second = new HashMap<String, Set<String>>();
first.put(start[1], second);
}
Set<String> third = second.get(start[2]);
if (third == null) {
third = new HashSet<String>();
second.put(start[2], third);
}
//now populate fourth place (* or value 0-254);
if (start.length == 3) {
third.add("*");
}
if (third.contains("*")) {
return;
}
if (start.length >= 4) {
int s = Integer.valueOf(start[3]);
int e = Integer.valueOf(end[3]);
for (int i = s; i <= e; i++) {
third.add(String.valueOf(i));
}
}
}
}
/** Check whether a given address is contained in this netblock.
*
* @param ip the address to be tested
* @return true if {@code ip} is within this table's limits
* @throws IPFormatException
*/
public boolean contains(String ip) throws IPFormatException {
String[] subnets = ip.split("\\.");
if (subnets.length != 4)
{
throw new IPFormatException("needs to be a single IP address");
}
Map<String, Map<String, Set<String>>> first = map.get(subnets[0]);
if (first == null)
{
return false;
}
Map<String, Set<String>> second = first.get(subnets[1]);
if (second == null)
{
return false;
}
Set<String> third = second.get(subnets[2]);
if (third == null)
{
return false;
}
return third.contains(subnets[3]) || third.contains("*");
}
/** Convert to a Set.
* @return this table's content as a Set
*/
public Set<String> toSet() {
HashSet<String> set = new HashSet<String>();
for (Map.Entry<String, Map<String, Map<String, Set<String>>>> first : map.entrySet()) {
String firstString = first.getKey();
Map<String, Map<String, Set<String>>> secondMap = first.getValue();
for (Map.Entry<String, Map<String, Set<String>>> second : secondMap.entrySet()) {
String secondString = second.getKey();
Map<String, Set<String>> thirdMap = second.getValue();
for (Map.Entry<String, Set<String>> third : thirdMap.entrySet()) {
String thirdString = third.getKey();
Set<String> fourthSet = third.getValue();
if (fourthSet.contains("*")) {
set.add(firstString + "." + secondString + "." + thirdString);
} else {
for (String fourth : fourthSet) {
set.add(firstString + "." + secondString + "." + thirdString + "." + fourth);
}
}
}
}
}
return set;
}
/**
* Exception Class to deal with IPFormat errors.
*/
public static class IPFormatException extends Exception {
public IPFormatException(String s) {
super(s);
}
}
}

View File

@@ -0,0 +1,168 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import java.io.IOException;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.Properties;
import java.util.ResourceBundle;
import org.apache.log4j.Logger;
import org.dspace.core.I18nUtil;
/**
* Mapping between Country codes, English Country names,
* Continent Codes, and English Continent names
*
* @author kevinvandevelde at atmire.com
* @author ben at atmire.com
*/
public class LocationUtils
{
private static final Logger logger = Logger.getLogger(LocationUtils.class);
private static final Properties countryToContinent = new Properties();
private static final String CONTINENT_NAMES_BUNDLE
= LocationUtils.class.getPackage().getName() + ".continent-names";
/**
* Map DSpace continent codes onto ISO country codes.
*
* @param countryCode ISO 3166-1 alpha-2 country code.
* @return DSpace 2-character code for continent containing that country, or
* an error message string.
*/
static public String getContinentCode(String countryCode)
{
if (null == countryCode)
{
logger.error("Null country code");
return I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-continent");
}
if (countryToContinent.isEmpty())
try
{
countryToContinent.load(LocationUtils.class
.getResourceAsStream("country-continent-codes.properties"));
}
catch (IOException e)
{
logger.error("Could not load country/continent map file", e);
}
String continent = countryToContinent.getProperty(countryCode);
if (null == continent)
{
logger.error("Unknown country code " + countryCode);
return I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-continent");
}
else
return continent;
}
/**
* Map DSpace continent codes onto default continent names.
*
* @param continentCode DSpace 2-character code for a continent.
* @return Name of the continent in the default locale, or an error message
* string.
*/
@Deprecated
static public String getContinentName(String continentCode)
{
return getContinentName(continentCode, Locale.getDefault());
}
/**
* Map DSpace continent codes onto localized continent names.
*
* @param continentCode DSpace 2-character code for a continent.
* @param locale The desired localization.
* @return Localized name of the continent, or an error message string.
*/
static public String getContinentName(String continentCode, Locale locale)
{
ResourceBundle names;
if (null == locale)
locale = Locale.US;
if (null == continentCode)
{
logger.error("Null continentCode");
return I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-continent");
}
try
{
names = ResourceBundle.getBundle(CONTINENT_NAMES_BUNDLE, locale);
}
catch (MissingResourceException e)
{
logger.error("Could not load continent code/name resource bundle",
e);
return I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-continent");
}
String name;
try
{
name = names.getString(continentCode);
}
catch (MissingResourceException e)
{
logger.error("No continent code " + continentCode + " in bundle "
+ names.getLocale().getDisplayName());
return I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-continent");
}
return name;
}
/**
* Map ISO country codes onto default country names.
*
* @param countryCode ISO 3166-1 alpha-2 country code.
* @return Name of the country in the default locale, or an error message
* string.
*/
@Deprecated
static public String getCountryName(String countryCode)
{
return getCountryName(countryCode, Locale.getDefault());
}
/**
* Map ISO country codes onto localized country names.
*
* @param countryCode ISO 3166-1 alpha-2 country code.
* @param locale Desired localization.
* @return Localized name of the country, or an error message string.
*/
static public String getCountryName(String countryCode, Locale locale)
{
if (null == countryCode)
return I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-country");
Locale country = new Locale("EN", countryCode);
String name = country.getDisplayCountry(locale);
if (name.isEmpty())
return I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-country");
else
return name;
}
}

View File

@@ -0,0 +1,197 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import org.apache.log4j.Logger;
import org.dspace.core.ConfigurationManager;
import javax.servlet.http.HttpServletRequest;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
/**
* SpiderDetector is used to find IP's that are spiders...
* In future someone may add UserAgents and Host Domains
* to the detection criteria here.
*
* @author kevinvandevelde at atmire.com
* @author ben at atmire.com
* @author Mark Diggory (mdiggory at atmire.com)
*/
public class SpiderDetector {
private static Logger log = Logger.getLogger(SpiderDetector.class);
private static Boolean useProxies;
/**
* Sparse HashTable structure to hold IP address ranges.
*/
private static IPTable table = null;
/**
* Utility method which Reads the ip addresses out a file & returns them in a Set
*
* @param spiderIpFile the location of our spider file
* @return a vector full of ip's
* @throws IOException could not happen since we check the file be4 we use it
*/
public static Set<String> readIpAddresses(File spiderIpFile) throws IOException {
Set<String> ips = new HashSet<String>();
if (!spiderIpFile.exists() || !spiderIpFile.isFile())
{
return ips;
}
//Read our file & get all them ip's
BufferedReader in = new BufferedReader(new FileReader(spiderIpFile));
String line;
while ((line = in.readLine()) != null) {
if (!line.startsWith("#")) {
line = line.trim();
if (!line.equals("") && !Character.isDigit(line.charAt(0))) {
// is a hostname
// add this functionality later...
} else if (!line.equals("")) {
ips.add(line);
// is full v4 ip (too tired to deal with v6)...
}
} else {
// ua.add(line.replaceFirst("#","").replaceFirst("UA","").trim());
// ... add this functionality later
}
}
in.close();
return ips;
}
/**
* Get an immutable Set representing all the Spider Addresses here
*
* @return
*/
public static Set<String> getSpiderIpAddresses() {
loadSpiderIpAddresses();
return table.toSet();
}
/*
private loader to populate the table from files.
*/
private static void loadSpiderIpAddresses() {
if (table == null) {
table = new IPTable();
String filePath = ConfigurationManager.getProperty("dspace.dir");
try {
File spidersDir = new File(filePath, "config/spiders");
if (spidersDir.exists() && spidersDir.isDirectory()) {
for (File file : spidersDir.listFiles()) {
for (String ip : readIpAddresses(file)) {
table.add(ip);
}
log.info("Loaded Spider IP file: " + file);
}
} else {
log.info("No spider file loaded");
}
}
catch (Exception e) {
log.error("Error Loading Spiders:" + e.getMessage(), e);
}
}
}
/**
* Static Service Method for testing spiders against existing spider files.
* <p/>
* In the future this will be extended to support User Agent and
* domain Name detection.
* <p/>
* In future spiders HashSet may be optimized as byte offset array to
* improve performance and memory footprint further.
*
* @param request
* @return true|false if the request was detected to be from a spider
*/
public static boolean isSpider(HttpServletRequest request) {
if (isUseProxies() && request.getHeader("X-Forwarded-For") != null) {
/* This header is a comma delimited list */
for (String xfip : request.getHeader("X-Forwarded-For").split(",")) {
if (isSpider(xfip))
{
return true;
}
}
}
return isSpider(request.getRemoteAddr());
}
/**
* Check individual IP is a spider.
*
* @param ip
* @return if is spider IP
*/
public static boolean isSpider(String ip) {
if (table == null) {
SpiderDetector.loadSpiderIpAddresses();
}
try {
if (table.contains(ip)) {
return true;
}
} catch (Exception e) {
return false;
}
return false;
}
private static boolean isUseProxies() {
if(useProxies == null) {
if ("true".equals(ConfigurationManager.getProperty("useProxies")))
{
useProxies = true;
}
else
{
useProxies = false;
}
}
return useProxies;
}
}

View File

@@ -0,0 +1,160 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import org.apache.commons.cli.*;
import org.apache.log4j.Logger;
import org.apache.tools.ant.taskdefs.Get;
import org.dspace.core.ConfigurationManager;
import org.dspace.statistics.SolrLogger;
import java.io.*;
import java.net.URL;
/**
* Class to load intermediate statistics files into solr
*
* @author Stuart Lewis
*/
public class StatisticsClient
{
private static final Logger log = Logger.getLogger(StatisticsClient.class);
/**
* Print the help message
*
* @param options The command line options the user gave
* @param exitCode the system exit code to use
*/
private static void printHelp(Options options, int exitCode)
{
// print the help message
HelpFormatter myhelp = new HelpFormatter();
myhelp.printHelp("StatisticsClient\n", options);
System.exit(exitCode);
}
/**
* Main method to run the statistics importer.
*
* @param args The command line arguments
* @throws Exception If something goes wrong
*/
public static void main(String[] args) throws Exception
{
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("u", "update-spider-files", false,
"Update Spider IP Files from internet into " +
ConfigurationManager.getProperty("dspace.dir") + "/config/spiders");
options.addOption("m", "mark-spiders", false, "Update isBot Flag in Solr");
options.addOption("f", "delete-spiders-by-flag", false, "Delete Spiders in Solr By isBot Flag");
options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address");
options.addOption("o", "optimize", false, "Run maintenance on the SOLR index");
options.addOption("b", "reindex-bitstreams", false, "Reindex the bitstreams to ensure we have the bundle name");
options.addOption("r", "remove-deleted-bitstreams", false, "While indexing the bundle names remove the statistics about deleted bitstreams");
options.addOption("s", "shard-solr-index", false, "Split the data from the main Solr core into separate Solr cores per year");
options.addOption("h", "help", false, "help");
CommandLine line = parser.parse(options, args);
// Did the user ask to see the help?
if (line.hasOption('h'))
{
printHelp(options, 0);
}
if(line.hasOption("u"))
{
StatisticsClient.updateSpiderFiles();
}
else if (line.hasOption('m'))
{
SolrLogger.markRobotsByIP();
}
else if(line.hasOption('f'))
{
SolrLogger.deleteRobotsByIsBotFlag();
}
else if(line.hasOption('i'))
{
SolrLogger.deleteRobotsByIP();
}
else if(line.hasOption('o'))
{
SolrLogger.optimizeSOLR();
}
else if(line.hasOption('b'))
{
SolrLogger.reindexBitstreamHits(line.hasOption('r'));
}
else if(line.hasOption('s'))
{
SolrLogger.shardSolrIndex();
}
else
{
printHelp(options, 0);
}
}
/**
* Method to update Spiders in config directory.
*/
private static void updateSpiderFiles()
{
try
{
System.out.println("Downloading latest spider IP addresses:");
// Get the list URLs to download from
String urls = ConfigurationManager.getProperty("solr-statistics", "spiderips.urls");
if ((urls == null) || ("".equals(urls)))
{
System.err.println(" - Missing setting from dspace.cfg: solr.spiderips.urls");
System.exit(0);
}
// Get the location of spiders directory
File spiders = new File(ConfigurationManager.getProperty("dspace.dir"),"config/spiders");
if (!spiders.exists() && !spiders.mkdirs())
{
log.error("Unable to create spiders directory");
}
String[] values = urls.split(",");
for (String value : values)
{
value = value.trim();
System.out.println(" Downloading: " + value);
URL url = new URL(value);
Get get = new Get();
get.setDest(new File(spiders, url.getHost() + url.getPath().replace("/","-")));
get.setSrc(url);
get.setUseTimestamp(true);
get.execute();
}
} catch (Exception e)
{
System.err.println(" - Error: " + e.getMessage());
e.printStackTrace();
System.exit(1);
}
}
}

View File

@@ -0,0 +1,427 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import org.apache.commons.cli.*;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.dspace.core.Context;
import org.dspace.core.Constants;
import org.dspace.core.ConfigurationManager;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Bitstream;
import org.dspace.content.DCValue;
import org.dspace.content.Item;
import org.dspace.eperson.EPerson;
import org.dspace.statistics.SolrLogger;
import java.util.Date;
import java.util.Map;
import java.text.SimpleDateFormat;
import com.maxmind.geoip.LookupService;
import com.maxmind.geoip.Location;
/**
* Test class to generate random statistics data.
* Used for load testing of searches. Inputs are slow
* due to inefficient randomizer.
*
* @author kevinvandevelde at atmire.com
* @author ben at atmire.com
*/
public class StatisticsDataGenerator {
public static void main(String[] args) throws Exception {
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("n", "nrlogs", true,
"type: nr of logs to be generated");
options.addOption("s", "startDate", true,
"type: the start date from which we start generating our logs");
options.addOption("e", "endDate", true,
"type: the end date from which we start generating our logs");
options.addOption("a", "cms", true, "The starting id of our community");
options.addOption("b", "cme", true, "The end id of our community");
options
.addOption("c", "cls", true,
"The starting id of our collection");
options.addOption("d", "cle", true, "The end if of our collection");
options.addOption("f", "is", true, "The starting id of our item");
options.addOption("g", "ie", true, "The end id of our item");
options.addOption("h", "bs", true, "The starting id of our bitstream");
options.addOption("i", "be", true, "The end id of our bitstream");
options.addOption("j", "ps", true, "The starting id of our epersons");
options.addOption("k", "pe", true, "The end id of our epersons");
CommandLine line = parser.parse(options, args);
int nrLogs;
long startDate;
long endDate;
long commStartId;
long commEndId;
long collStartId;
long collEndId;
long itemStartId;
long itemEndId;
long bitStartId;
long bitEndId;
long epersonStartId;
long epersonEndId;
if (line.hasOption("n"))
{
nrLogs = Integer.parseInt(line.getOptionValue("n"));
}
else {
System.out
.println("We need to know how many logs we need to create");
return;
}
if (line.hasOption("s")) {
startDate = getDateInMiliseconds(line.getOptionValue("s"));
} else
{
startDate = getDateInMiliseconds("01/01/2006");
}
if (line.hasOption("e")) {
endDate = getDateInMiliseconds(line.getOptionValue("e"));
} else
{
endDate = new Date().getTime();
}
if (line.hasOption("a"))
{
commStartId = Long.parseLong(line.getOptionValue("a"));
}
else
{
return;
}
if (line.hasOption("b"))
{
commEndId = Long.parseLong(line.getOptionValue("b"));
}
else
{
return;
}
if (line.hasOption("c"))
{
collStartId = Long.parseLong(line.getOptionValue("c"));
}
else
{
return;
}
if (line.hasOption("d"))
{
collEndId = Long.parseLong(line.getOptionValue("d"));
}
else
{
return;
}
if (line.hasOption("f"))
{
itemStartId = Long.parseLong(line.getOptionValue("f"));
}
else
{
return;
}
if (line.hasOption("g"))
{
itemEndId = Long.parseLong(line.getOptionValue("g"));
}
else
{
return;
}
if (line.hasOption("h"))
{
bitStartId = Long.parseLong(line.getOptionValue("h"));
}
else
{
return;
}
if (line.hasOption("i"))
{
bitEndId = Long.parseLong(line.getOptionValue("i"));
}
else
{
return;
}
if (line.hasOption("j"))
{
epersonStartId = Long.parseLong(line.getOptionValue("j"));
}
else
{
return;
}
if (line.hasOption("k"))
{
epersonEndId = Long.parseLong(line.getOptionValue("k"));
}
else
{
return;
}
// Get the max id range
long maxIdTotal = Math.max(commEndId, collEndId);
maxIdTotal = Math.max(maxIdTotal, itemEndId);
maxIdTotal = Math.max(maxIdTotal, bitEndId);
// We got 3/4 chance the person visting the dso is not logged in
epersonEndId *= 4;
// We got all our parameters now get the rest
Context context = new Context();
// Find our solr server
CommonsHttpSolrServer solr = new CommonsHttpSolrServer(
ConfigurationManager.getProperty("solr-statistics", "server"));
solr.deleteByQuery("*:*");
solr.commit();
Map<String, String> metadataStorageInfo = SolrLogger.getMetadataStorageInfo();
String prevIp = null;
String dbfile = ConfigurationManager.getProperty("usage-statistics", "dbfile");
LookupService cl = new LookupService(dbfile,
LookupService.GEOIP_STANDARD);
int countryErrors = 0;
for (int i = 0; i < nrLogs; i++) {
String ip = "";
Date time;
String continent;
String countryCode;
float longitude;
float latitude;
String city;
// 1. Generate an ip for our user
StringBuilder ipBuilder = new StringBuilder();
for (int j = 0; j < 4; j++) {
ipBuilder.append(getRandomNumberInRange(0, 254));
if (j != 3)
{
ipBuilder.append(".");
}
}
ip = ipBuilder.toString();
// 2 Depending on our ip get all the location info
Location location;
try {
location = cl.getLocation(ip);
} catch (Exception e) {
location = null;
}
if (location == null) {
// If we haven't got a prev ip this is pretty useless so move on
// to the next one
if (prevIp == null)
{
continue;
}
ip = prevIp;
location = cl.getLocation(ip);
}
city = location.city;
countryCode = location.countryCode;
longitude = location.longitude;
latitude = location.latitude;
try {
continent = LocationUtils.getContinentCode(countryCode);
} catch (Exception e) {
// We could get an error if our country == Europa this doesn't
// matter for generating statistics so ignore it
System.out.println("COUNTRY ERROR: " + countryCode);
countryErrors++;
continue;
}
// 3. Generate a date that the object was visited
time = new Date(getRandomNumberInRange(startDate, endDate));
// 4. Get our dspaceobject we are supposed to be working on
// We got mostly item views so lets say we got 1/2 chance that we
// got an item view
// What type have we got (PS: I know we haven't got 5 as a dso type
// we can log but it is used so our item gets move traffic)
int type = (int) getRandomNumberInRange(0, 8);
if (type == Constants.BUNDLE || type >= 5)
{
type = Constants.ITEM;
}
int dsoId = -1;
// Now we need to find a valid id
switch (type) {
case Constants.COMMUNITY:
dsoId = (int) getRandomNumberInRange(commStartId, commEndId);
break;
case Constants.COLLECTION:
dsoId = (int) getRandomNumberInRange(collStartId, collEndId);
break;
case Constants.ITEM:
dsoId = (int) getRandomNumberInRange(itemStartId, itemEndId);
break;
case Constants.BITSTREAM:
dsoId = (int) getRandomNumberInRange(bitStartId, bitEndId);
break;
}
// Now find our dso
DSpaceObject dso = DSpaceObject.find(context, type, dsoId);
if (dso instanceof Bitstream) {
Bitstream bit = (Bitstream) dso;
if (bit.getFormat().isInternal()) {
dso = null;
}
}
// Make sure we got a dso
boolean substract = false;
while (dso == null) {
// If our dsoId gets higher then our maxIdtotal we need to lower
// to find a valid id
if (dsoId == maxIdTotal)
{
substract = true;
}
if (substract)
{
dsoId--;
}
else
{
dsoId++;
}
dso = DSpaceObject.find(context, type, dsoId);
if (dso instanceof Bitstream) {
Bitstream bit = (Bitstream) dso;
if (bit.getFormat().isInternal()) {
dso = null;
}
}
// System.out.println("REFIND");
}
// Find the person who is visting us
int epersonId = (int) getRandomNumberInRange(epersonStartId, epersonEndId);
EPerson eperson = EPerson.find(context, epersonId);
if (eperson == null)
{
epersonId = -1;
}
// System.out.println(ip);
// System.out.println(country + " " +
// LocationUtils.getCountryName(countryCode));
// Resolve the dns
String dns = null;
try {
dns = DnsLookup.reverseDns(ip);
} catch (Exception e) {
}
System.out.println(ip);
System.out.println(dns);
// Save it in our server
SolrInputDocument doc1 = new SolrInputDocument();
doc1.addField("ip", ip);
doc1.addField("type", dso.getType());
doc1.addField("id", dso.getID());
doc1.addField("time", DateFormatUtils.format(time,
SolrLogger.DATE_FORMAT_8601));
doc1.addField("continent", continent);
// doc1.addField("country", country);
doc1.addField("countryCode", countryCode);
doc1.addField("city", city);
doc1.addField("latitude", latitude);
doc1.addField("longitude", longitude);
if (epersonId > 0)
{
doc1.addField("epersonid", epersonId);
}
if (dns != null)
{
doc1.addField("dns", dns.toLowerCase());
}
if (dso instanceof Item) {
Item item = (Item) dso;
// Store the metadata
for (Map.Entry<String, String> entry : metadataStorageInfo.entrySet())
{
String dcField = entry.getValue();
DCValue[] vals = item.getMetadata(dcField.split("\\.")[0],
dcField.split("\\.")[1], dcField.split("\\.")[2],
Item.ANY);
for (DCValue val1 : vals) {
String val = val1.value;
doc1.addField(entry.getKey(), val);
doc1.addField(entry.getKey() + "_search", val.toLowerCase());
}
}
}
SolrLogger.storeParents(doc1, dso);
solr.add(doc1);
// Make sure we have a previous ip
prevIp = ip;
}
System.out.println("Nr of countryErrors: " + countryErrors);
// Commit at the end cause it takes a while
solr.commit();
}
/**
* Method returns a random integer between the given int
*
* @param min
* the random number must be greater or equal to this
* @param max
* the random number must be smaller or equal to this
* @return a random in
*/
private static long getRandomNumberInRange(long min, long max) {
return min + (long) (Math.random() * ((max - min) + 1));
}
/**
* Method to get the miliseconds from a datestring
*
* @param dateString
* the string containing our date in a string
* @return the nr of miliseconds in the given datestring
* @throws java.text.ParseException
* should not happen
*/
private static long getDateInMiliseconds(String dateString)
throws java.text.ParseException {
SimpleDateFormat formatter = new SimpleDateFormat("dd/MM/yyyy");
return formatter.parse(dateString).getTime();
}
}

View File

@@ -0,0 +1,537 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import org.apache.commons.cli.*;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.dspace.content.*;
import org.dspace.content.Collection;
import org.dspace.core.Context;
import org.dspace.core.Constants;
import org.dspace.core.ConfigurationManager;
import org.dspace.eperson.EPerson;
import org.dspace.statistics.SolrLogger;
import java.text.*;
import java.io.*;
import java.util.*;
import com.maxmind.geoip.LookupService;
import com.maxmind.geoip.Location;
/**
* Class to load intermediate statistics files into solr
*
* @author Stuart Lewis
*/
public class StatisticsImporter
{
private static final Logger log = Logger.getLogger(StatisticsImporter.class);
/** Date format (for solr) */
private static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
/** Solr server connection */
private static CommonsHttpSolrServer solr;
/** GEOIP lookup service */
private static LookupService geoipLookup;
/** Metadata storage information */
private static Map<String, String> metadataStorageInfo;
/** Whether to skip the DNS reverse lookup or not */
private static boolean skipReverseDNS = false;
/** Local items */
private List<Integer> localItems;
/** Local collections */
private List<Integer> localCollections;
/** Local communities */
private List<Integer> localCommunities;
/** Local bitstreams */
private List<Integer> localBitstreams;
/** Whether or not to replace item IDs with local values (for testing) */
private boolean useLocal;
/**
* Constructor. Optionally loads local data to replace foreign data
* if using someone else's log files
*
* @param local Whether to use local data
*/
public StatisticsImporter(boolean local)
{
// Setup the lists of communities, collections, items & bitstreams if required
useLocal = local;
if (local)
{
try
{
System.out.print("Loading local communities... ");
Context c = new Context();
Community[] communities = Community.findAll(c);
localCommunities = new ArrayList<Integer>();
for (Community community : communities)
{
localCommunities.add(community.getID());
}
System.out.println("Found " + localCommunities.size());
System.out.print("Loading local collections... ");
Collection[] collections = Collection.findAll(c);
localCollections = new ArrayList<Integer>();
for (Collection collection : collections)
{
localCollections.add(collection.getID());
}
System.out.println("Found " + localCollections.size());
System.out.print("Loading local items... ");
ItemIterator items = Item.findAll(c);
localItems = new ArrayList<Integer>();
Item i;
while (items.hasNext())
{
i = items.next();
localItems.add(i.getID());
}
System.out.println("Found " + localItems.size());
System.out.print("Loading local bitstreams... ");
Bitstream[] bitstreams = Bitstream.findAll(c);
localBitstreams = new ArrayList<Integer>();
for (Bitstream bitstream : bitstreams)
{
if (bitstream.getName() != null)
{
localBitstreams.add(bitstream.getID());
}
}
System.out.println("Found " + localBitstreams.size());
} catch (Exception e)
{
System.err.println("Error retrieving items from DSpace database:");
e.printStackTrace();
System.exit(1);
}
}
}
/**
* Read lines from the statistics file and load their data into solr.
*
* @param filename The filename of the file to load
* @param context The DSpace Context
* @param verbose Whether to display verbose output
*/
private void load(String filename, Context context, boolean verbose)
{
// Item counter
int counter = 0;
int errors = 0;
int searchengines = 0;
try
{
BufferedReader input;
if (null == filename || "-".equals(filename))
{
input = new BufferedReader(new InputStreamReader(System.in));
filename = "standard input";
}
else
input = new BufferedReader(new FileReader(new File(filename)));
// Print out the filename for confirmation
System.out.println("Processing file: " + filename);
String line;
// String uuid;
String action;
String id;
Date date;
String user;
String ip;
String continent = "";
String country = "";
String countryCode = "";
float longitude = 0f;
float latitude = 0f;
String city = "";
String dns;
DNSCache dnsCache = new DNSCache(2500, 0.75f, 2500);
Object fromCache;
Random rand = new Random();
while ((line = input.readLine()) != null)
{
// Tokenise the line
String data = "";
counter++;
errors++;
if (verbose)
{
System.out.println("Line:" + line);
}
String[] parts = line.split(",");
// uuid = parts[0];
action = parts[1];
id = parts[2];
date = dateFormat.parse(parts[3]);
user = parts[4];
ip = parts[5];
// Resolve the dns (if applicable) to get rid of search engine bots early on in the processing chain
dns = "";
if (!skipReverseDNS)
{
// Is the IP address in the cache?
fromCache = dnsCache.get(ip);
if (fromCache != null)
{
dns = (String)fromCache;
}
else
{
try
{
dns = DnsLookup.reverseDns(ip);
dnsCache.put(ip, dns);
} catch (Exception e)
{
dns = "";
}
}
}
data += ("ip addr = " + ip);
data += (", dns name = " + dns);
if ((dns.endsWith(".googlebot.com.")) ||
(dns.endsWith(".crawl.yahoo.net.")) ||
(dns.endsWith(".search.msn.com.")))
{
if (verbose)
{
System.out.println(data + ", IGNORE (search engine)");
}
errors--;
searchengines++;
continue;
}
// Get the geo information for the user
Location location;
try {
location = geoipLookup.getLocation(ip);
city = location.city;
country = location.countryName;
countryCode = location.countryCode;
longitude = location.longitude;
latitude = location.latitude;
if(verbose) {
data += (", country = " + country);
data += (", city = " + city);
System.out.println(data);
}
try {
continent = LocationUtils.getContinentCode(countryCode);
} catch (Exception e) {
if (verbose)
{
System.out.println("Unknown country code: " + countryCode);
}
continue;
}
} catch (Exception e) {
// No problem - just can't look them up
}
// Now find our dso
int type = 0;
if ("view_bitstream".equals(action))
{
type = Constants.BITSTREAM;
if (useLocal)
{
id = "" + localBitstreams.get(rand.nextInt(localBitstreams.size()));
}
}
else if ("view_item".equals(action))
{
type = Constants.ITEM;
if (useLocal)
{
id = "" + localItems.get(rand.nextInt(localItems.size()));
}
}
else if ("view_collection".equals(action))
{
type = Constants.COLLECTION;
if (useLocal)
{
id = "" + localCollections.get(rand.nextInt(localCollections.size()));
}
}
else if ("view_community".equals(action))
{
type = Constants.COMMUNITY;
if (useLocal)
{
id = "" + localCommunities.get(rand.nextInt(localCommunities.size()));
}
}
DSpaceObject dso = DSpaceObject.find(context, type, Integer.parseInt(id));
if (dso == null)
{
if (verbose)
{
System.err.println(" - DSO with ID '" + id + "' is no longer in the system");
}
continue;
}
// Get the eperson details
EPerson eperson = EPerson.findByEmail(context, user);
int epersonId = 0;
if (eperson != null)
{
eperson.getID();
}
// Save it in our server
SolrInputDocument sid = new SolrInputDocument();
sid.addField("ip", ip);
sid.addField("type", dso.getType());
sid.addField("id", dso.getID());
sid.addField("time", DateFormatUtils.format(date, SolrLogger.DATE_FORMAT_8601));
sid.addField("continent", continent);
sid.addField("country", country);
sid.addField("countryCode", countryCode);
sid.addField("city", city);
sid.addField("latitude", latitude);
sid.addField("longitude", longitude);
if (epersonId > 0)
{
sid.addField("epersonid", epersonId);
}
if (dns != null)
{
sid.addField("dns", dns.toLowerCase());
}
if (dso instanceof Item) {
Item item = (Item) dso;
// Store the metadata
for (String storedField : metadataStorageInfo.keySet()) {
String dcField = metadataStorageInfo.get(storedField);
DCValue[] vals = item.getMetadata(dcField.split("\\.")[0],
dcField.split("\\.")[1], dcField.split("\\.")[2],
Item.ANY);
for (DCValue val1 : vals) {
String val = val1.value;
sid.addField(String.valueOf(storedField), val);
sid.addField(String.valueOf(storedField + "_search"),
val.toLowerCase());
}
}
}
SolrLogger.storeParents(sid, dso);
solr.add(sid);
errors--;
}
}
catch (RuntimeException re)
{
throw re;
}
catch (Exception e)
{
System.err.println(e.getMessage());
log.error(e.getMessage(), e);
}
DecimalFormat percentage = new DecimalFormat("##.###");
int committed = counter - errors - searchengines;
System.out.println("Processed " + counter + " log lines");
if (counter > 0)
{
Double committedpercentage = 100d * committed / counter;
System.out.println(" - " + committed + " entries added to solr: " + percentage.format(committedpercentage) + "%");
Double errorpercentage = 100d * errors / counter;
System.out.println(" - " + errors + " errors: " + percentage.format(errorpercentage) + "%");
Double sepercentage = 100d * searchengines / counter;
System.out.println(" - " + searchengines + " search engine activity skipped: " + percentage.format(sepercentage) + "%");
System.out.print("About to commit data to solr...");
// Commit at the end because it takes a while
try
{
solr.commit();
}
catch (SolrServerException sse)
{
System.err.println("Error committing statistics to solr server!");
sse.printStackTrace();
System.exit(1);
}
catch (IOException ioe)
{
System.err.println("Error writing to solr server!");
ioe.printStackTrace();
System.exit(1);
}
}
System.out.println(" done!");
}
/**
* Print the help message
*
* @param options The command line options the user gave
* @param exitCode the system exit code to use
*/
private static void printHelp(Options options, int exitCode)
{
// print the help message
HelpFormatter myhelp = new HelpFormatter();
myhelp.printHelp("StatisticsImporter\n", options);
System.exit(exitCode);
}
/**
* Main method to run the statistics importer.
*
* @param args The command line arguments
* @throws Exception If something goes wrong
*/
public static void main(String[] args) throws Exception
{
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("i", "in", true, "the input file ('-' or omit for standard input)");
options.addOption("l", "local", false, "developers tool - map external log file to local handles");
options.addOption("m", "multiple", false, "treat the input file as having a wildcard ending");
options.addOption("s", "skipdns", false, "skip performing reverse DNS lookups on IP addresses");
options.addOption("v", "verbose", false, "display verbose output (useful for debugging)");
options.addOption("h", "help", false, "help");
CommandLine line = parser.parse(options, args);
// Did the user ask to see the help?
if (line.hasOption('h'))
{
printHelp(options, 0);
}
if (line.hasOption('s'))
{
skipReverseDNS = true;
}
// Whether or not to convert handles to handles used in a local system
// (useful if using someone else's log file for testing)
boolean local = line.hasOption('l');
// We got all our parameters now get the rest
Context context = new Context();
// Verbose option
boolean verbose = line.hasOption('v');
// Find our solr server
String sserver = ConfigurationManager.getProperty("solr-statistics", "server");
if (verbose)
{
System.out.println("Writing to solr server at: " + sserver);
}
solr = new CommonsHttpSolrServer(sserver);
metadataStorageInfo = SolrLogger.getMetadataStorageInfo();
String dbfile = ConfigurationManager.getProperty("usage-statistics", "dbfile");
try
{
geoipLookup = new LookupService(dbfile, LookupService.GEOIP_STANDARD);
}
catch (FileNotFoundException fe)
{
log.error("The GeoLite Database file is missing (" + dbfile + ")! Solr Statistics cannot generate location based reports! Please see the DSpace installation instructions for instructions to install this file.", fe);
}
catch (IOException e)
{
log.error("Unable to load GeoLite Database file (" + dbfile + ")! You may need to reinstall it. See the DSpace installation instructions for more details.", e);
}
StatisticsImporter si = new StatisticsImporter(local);
if (line.hasOption('m'))
{
// Convert all the files
final File sample = new File(line.getOptionValue('i'));
File dir = sample.getParentFile();
FilenameFilter filter = new FilenameFilter()
{
public boolean accept(File dir, String name)
{
return name.startsWith(sample.getName());
}
};
String[] children = dir.list(filter);
for (String in : children)
{
System.out.println(in);
si.load(dir.getAbsolutePath() + System.getProperty("file.separator") + in, context, verbose);
}
}
else
{
// Just convert the one file
si.load(line.getOptionValue('i'), context, verbose);
}
}
/**
* Inner class to hold a cache of reverse lookups of IP addresses
* @param <K>
* @param <V>
*/
static class DNSCache<K,V> extends LinkedHashMap<K,V>
{
private int maxCapacity;
public DNSCache(int initialCapacity, float loadFactor, int maxCapacity)
{
super(initialCapacity, loadFactor, true);
this.maxCapacity = maxCapacity;
}
@Override
protected boolean removeEldestEntry(java.util.Map.Entry<K,V> eldest)
{
return size() >= this.maxCapacity;
}
}
}

View File

@@ -0,0 +1,450 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import com.maxmind.geoip.Location;
import com.maxmind.geoip.LookupService;
import org.apache.commons.cli.*;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.statistics.ElasticSearchLogger;
import org.dspace.statistics.SolrLogger;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.geo.GeoPoint;
import java.io.*;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Random;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
/**
* Created by IntelliJ IDEA.
* User: peterdietz
* Date: 8/15/12
* Time: 2:46 PM
* To change this template use File | Settings | File Templates.
*/
public class StatisticsImporterElasticSearch {
private static final Logger log = Logger.getLogger(StatisticsImporterElasticSearch.class);
/** Date format */
private static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
//TODO ES Client
/** GEOIP lookup service */
private static LookupService geoipLookup;
/** Metadata storage information */
private static Map<String, String> metadataStorageInfo;
/** Whether to skip the DNS reverse lookup or not */
private static boolean skipReverseDNS = false;
private static ElasticSearchLogger elasticSearchLoggerInstance;
private static Client client;
private static BulkRequestBuilder bulkRequest;
/**
* Read lines from the statistics file and load their data into Elastic Search.
*
* @param filename The filename of the file to load
* @param context The DSpace Context
* @param verbose Whether to display verbose output
*/
private void load(String filename, Context context, boolean verbose)
{
// Item counter
int counter = 0;
int errors = 0;
int searchengines = 0;
try
{
BufferedReader input;
if (null == filename || "-".equals(filename))
{
input = new BufferedReader(new InputStreamReader(System.in));
filename = "standard input";
}
else
input = new BufferedReader(new FileReader(new File(filename)));
// Print out the filename for confirmation
System.out.println("Processing file: " + filename);
String line;
// String uuid;
String action;
String id;
Date date;
String user;
String ip;
String continent = "";
String country = "";
String countryCode = "";
float longitude = 0f;
float latitude = 0f;
String city = "";
String dns;
DNSCache dnsCache = new DNSCache(2500, 0.75f, 2500);
Object fromCache;
Random rand = new Random();
while ((line = input.readLine()) != null)
{
// Tokenise the line
counter++;
errors++;
if (verbose)
{
System.out.println("Line:" + line);
}
String[] parts = line.split(",");
// uuid = parts[0];
action = parts[1];
id = parts[2];
date = dateFormat.parse(parts[3]);
user = parts[4];
ip = parts[5];
// Resolve the dns (if applicable) to get rid of search engine bots early on in the processing chain
dns = "";
if (!skipReverseDNS)
{
// Is the IP address in the cache?
fromCache = dnsCache.get(ip);
if (fromCache != null)
{
dns = (String)fromCache;
}
else
{
try
{
dns = DnsLookup.reverseDns(ip);
dnsCache.put(ip, dns);
} catch (Exception e)
{
dns = "";
}
}
}
String data = "";
data += ("ip addr = " + ip);
data += (", dns name = " + dns);
if ((dns.endsWith(".googlebot.com.")) ||
(dns.endsWith(".crawl.yahoo.net.")) ||
(dns.endsWith(".search.msn.com.")))
{
if (verbose)
{
System.out.println(data + ", IGNORE (search engine)");
}
errors--;
searchengines++;
continue;
}
// Get the geo information for the user
Location location;
try {
location = geoipLookup.getLocation(ip);
city = location.city;
country = location.countryName;
countryCode = location.countryCode;
longitude = location.longitude;
latitude = location.latitude;
if(verbose) {
data += (", country = " + country);
data += (", city = " + city);
System.out.println(data);
}
try {
continent = LocationUtils.getContinentCode(countryCode);
} catch (Exception e) {
if (verbose)
{
System.out.println("Unknown country code: " + countryCode);
}
continue;
}
} catch (Exception e) {
// No problem - just can't look them up
}
// Now find our dso
int type = 0;
if ("view_bitstream".equals(action))
{
type = Constants.BITSTREAM;
}
else if ("view_item".equals(action))
{
type = Constants.ITEM;
}
else if ("view_collection".equals(action))
{
type = Constants.COLLECTION;
}
else if ("view_community".equals(action))
{
type = Constants.COMMUNITY;
}
DSpaceObject dso = DSpaceObject.find(context, type, Integer.parseInt(id));
if (dso == null)
{
if (verbose)
{
System.err.println(" - DSO with ID '" + id + "' is no longer in the system");
}
continue;
}
// Get the eperson details
EPerson eperson = EPerson.findByEmail(context, user);
int epersonId = 0;
if (eperson != null)
{
eperson.getID();
}
//TODO Is there any way to reuse ElasticSearchLogger.post() ?
// Save it in our server
XContentBuilder postBuilder = XContentFactory.jsonBuilder().startObject()
.field("id", dso.getID())
.field("typeIndex", dso.getType())
.field("type", dso.getTypeText())
.field("geo", new GeoPoint(latitude, longitude))
.field("continent", continent)
.field("countryCode", countryCode)
.field("country", country)
.field("city", city)
.field("ip", ip)
.field("time", DateFormatUtils.format(date, SolrLogger.DATE_FORMAT_8601));
// Unable to get UserAgent from logs. .field("userAgent")
if (dso instanceof Bitstream) {
Bitstream bit = (Bitstream) dso;
Bundle[] bundles = bit.getBundles();
postBuilder = postBuilder.field("bundleName").startArray();
for (Bundle bundle : bundles) {
postBuilder = postBuilder.value(bundle.getName());
}
postBuilder = postBuilder.endArray();
}
if (epersonId > 0)
{
postBuilder = postBuilder.field("epersonid", epersonId);
}
if (dns != null)
{
postBuilder = postBuilder.field("dns", dns.toLowerCase());
}
//Save for later: .field("isBot")
elasticSearchLoggerInstance.storeParents(postBuilder, elasticSearchLoggerInstance.getParents(dso));
bulkRequest.add(client.prepareIndex(elasticSearchLoggerInstance.getIndexName(), elasticSearchLoggerInstance.getIndexType())
.setSource(postBuilder.endObject()));
errors--;
}
if(bulkRequest.numberOfActions() > 0) {
BulkResponse bulkResponse = bulkRequest.execute().actionGet();
if(bulkResponse.hasFailures()) {
log.error("Bulk Request Failed due to: " + bulkResponse.buildFailureMessage());
}
}
}
catch (RuntimeException re)
{
throw re;
}
catch (Exception e)
{
System.err.println(e.getMessage());
log.error(e.getMessage(), e);
}
DecimalFormat percentage = new DecimalFormat("##.###");
int committed = counter - errors - searchengines;
System.out.println("Processed " + counter + " log lines");
if (counter > 0)
{
Double committedpercentage = 100d * committed / counter;
System.out.println(" - " + committed + " entries added to ElasticSearch: " + percentage.format(committedpercentage) + "%");
Double errorpercentage = 100d * errors / counter;
System.out.println(" - " + errors + " errors: " + percentage.format(errorpercentage) + "%");
Double sepercentage = 100d * searchengines / counter;
System.out.println(" - " + searchengines + " search engine activity skipped: " + percentage.format(sepercentage) + "%");
}
System.out.println(" done!");
}
/**
* Print the help message
*
* @param options The command line options the user gave
* @param exitCode the system exit code to use
*/
private static void printHelp(Options options, int exitCode)
{
// print the help message
HelpFormatter myhelp = new HelpFormatter();
myhelp.printHelp("StatisticsImporterElasticSearch\n", options);
System.exit(exitCode);
}
/**
* Main method to run the statistics importer.
*
* @param args The command line arguments
* @throws Exception If something goes wrong
*/
public static void main(String[] args) throws Exception
{
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("i", "in", true, "the input file ('-' or omit for standard input)");
options.addOption("m", "multiple", false, "treat the input file as having a wildcard ending");
options.addOption("s", "skipdns", false, "skip performing reverse DNS lookups on IP addresses");
options.addOption("v", "verbose", false, "display verbose output (useful for debugging)");
options.addOption("h", "help", false, "help");
CommandLine line = parser.parse(options, args);
// Did the user ask to see the help?
if (line.hasOption('h'))
{
printHelp(options, 0);
}
if (line.hasOption('s'))
{
skipReverseDNS = true;
}
elasticSearchLoggerInstance = new ElasticSearchLogger();
log.info("Getting ElasticSearch Transport Client for StatisticsImporterElasticSearch...");
// This is only invoked via terminal, do not use _this_ node as that data storing node.
// Need to get a NodeClient or TransportClient, but definitely do not want to get a local data storing client.
client = elasticSearchLoggerInstance.getClient(ElasticSearchLogger.ClientType.TRANSPORT);
client.admin().indices().prepareRefresh(ElasticSearchLogger.getIndexName()).execute().actionGet();
bulkRequest = client.prepareBulk();
// We got all our parameters now get the rest
Context context = new Context();
// Verbose option
boolean verbose = line.hasOption('v');
String dbfile = ConfigurationManager.getProperty("usage-statistics", "dbfile");
try
{
geoipLookup = new LookupService(dbfile, LookupService.GEOIP_STANDARD);
}
catch (FileNotFoundException fe)
{
log.error("The GeoLite Database file is missing (" + dbfile + ")! Elastic Search Statistics cannot generate location based reports! Please see the DSpace installation instructions for instructions to install this file.", fe);
}
catch (IOException e)
{
log.error("Unable to load GeoLite Database file (" + dbfile + ")! You may need to reinstall it. See the DSpace installation instructions for more details.", e);
}
StatisticsImporterElasticSearch elasticSearchImporter = new StatisticsImporterElasticSearch();
if (line.hasOption('m'))
{
// Convert all the files
final File sample = new File(line.getOptionValue('i'));
File dir = sample.getParentFile();
FilenameFilter filter = new FilenameFilter()
{
public boolean accept(File dir, String name)
{
return name.startsWith(sample.getName());
}
};
String[] children = dir.list(filter);
for (String in : children)
{
System.out.println(in);
elasticSearchImporter.load(dir.getAbsolutePath() + System.getProperty("file.separator") + in, context, verbose);
}
}
else
{
// Just convert the one file
elasticSearchImporter.load(line.getOptionValue('i'), context, verbose);
}
}
/**
* Inner class to hold a cache of reverse lookups of IP addresses
* @param <K>
* @param <V>
*/
static class DNSCache<K,V> extends LinkedHashMap<K,V>
{
private int maxCapacity;
public DNSCache(int initialCapacity, float loadFactor, int maxCapacity)
{
super(initialCapacity, loadFactor, true);
this.maxCapacity = maxCapacity;
}
@Override
protected boolean removeEldestEntry(java.util.Map.Entry<K,V> eldest)
{
return size() >= this.maxCapacity;
}
}
}

View File

@@ -0,0 +1,15 @@
#
# The contents of this file are subject to the license and copyright
# detailed in the LICENSE and NOTICE files at the root of the source
# tree and available online at
#
# http://www.dspace.org/license/
#
NA = North America
SA = South America
AN = Antarctica
AF = Africa
EU = Europe
AS = Asia
OC = Oceania

View File

@@ -0,0 +1,254 @@
#
# The contents of this file are subject to the license and copyright
# detailed in the LICENSE and NOTICE files at the root of the source
# tree and available online at
#
# http://www.dspace.org/license/
#
AF = AS
AX = EU
AL = EU
DZ = AF
AS = OC
AD = EU
AO = AF
AI = NA
AQ = AN
AG = NA
AR = SA
AM = AS
AW = NA
AU = OC
AT = EU
AZ = AS
BS = NA
BH = AS
BD = AS
BB = NA
BY = EU
BE = EU
BZ = NA
BJ = AF
BM = NA
BT = AS
BO = SA
BA = EU
BW = AF
BV = AN
BR = SA
IO = AS
VG = NA
BN = AS
BG = EU
BF = AF
BI = AF
KH = AS
CM = AF
CA = NA
CV = AF
KY = NA
CF = AF
TD = AF
CL = SA
CN = AS
CX = AS
CC = AS
CO = SA
KM = AF
CD = AF
CG = AF
CK = OC
CR = NA
CI = AF
HR = EU
CU = NA
CY = AS
CZ = EU
DK = EU
DJ = AF
DM = NA
DO = NA
EC = SA
EG = AF
SV = NA
GQ = AF
ER = AF
EE = EU
ET = AF
FO = EU
FK = SA
FJ = OC
FI = EU
FR = EU
GF = SA
PF = OC
TF = AN
GA = AF
GM = AF
GE = AS
DE = EU
GH = AF
GI = EU
GR = EU
GL = NA
GD = NA
GP = NA
GU = OC
GT = NA
GG = EU
GN = AF
GW = AF
GY = SA
HT = NA
HM = AN
VA = EU
HN = NA
HK = AS
HU = EU
IS = EU
IN = AS
ID = AS
IR = AS
IQ = AS
IE = EU
IM = EU
IL = AS
IT = EU
JM = NA
JP = AS
JE = EU
JO = AS
KZ = AS
KE = AF
KI = OC
KP = AS
KR = AS
KW = AS
KG = AS
LA = AS
LV = EU
LB = AS
LS = AF
LR = AF
LY = AF
LI = EU
LT = EU
LU = EU
MO = AS
MK = EU
MG = AF
MW = AF
MY = AS
MV = AS
ML = AF
MT = EU
MH = OC
MQ = NA
MR = AF
MU = AF
YT = AF
MX = NA
FM = OC
MD = EU
MC = EU
MN = AS
ME = EU
MS = NA
MA = AF
MZ = AF
MM = AS
NA = AF
NR = OC
NP = AS
AN = NA
NL = EU
NC = OC
NZ = OC
NI = NA
NE = AF
NG = AF
NU = OC
NF = OC
MP = OC
NO = EU
OM = AS
PK = AS
PW = OC
PS = AS
PA = NA
PG = OC
PY = SA
PE = SA
PH = AS
PN = OC
PL = EU
PT = EU
PR = NA
QA = AS
RE = AF
RO = EU
RU = EU
RW = AF
BL = NA
SH = AF
KN = NA
LC = NA
MF = NA
PM = NA
VC = NA
WS = OC
SM = EU
ST = AF
SA = AS
SN = AF
RS = EU
SC = AF
SL = AF
SG = AS
SK = EU
SI = EU
SB = OC
SO = AF
ZA = AF
GS = AN
ES = EU
LK = AS
SD = AF
SR = SA
SJ = EU
SZ = AF
SE = EU
CH = EU
SY = AS
TW = AS
TJ = AS
TZ = AF
TH = AS
TL = AS
TG = AF
TK = OC
TO = OC
TT = NA
TN = AF
TR = AS
TM = AS
TC = NA
TV = OC
UG = AF
UA = EU
AE = AS
GB = EU
US = NA
UM = OC
VI = NA
UY = SA
UZ = AS
VU = OC
VE = SA
VN = AS
WF = OC
EH = AF
YE = AS
ZM = AF
ZW = AF

View File

@@ -0,0 +1,79 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import java.util.Locale;
import static org.junit.Assert.*;
import org.dspace.core.I18nUtil;
import org.junit.Test;
/**
* @author mwood
*/
public class TestLocationUtils
{
private static final String UNKNOWN_CONTINENT = I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-continent");
private static final String UNKNOWN_COUNTRY = I18nUtil
.getMessage("org.dspace.statistics.util.LocationUtils.unknown-country");
/**
* Test method for {@link org.dspace.statistics.util.LocationUtils#getContinentCode(java.lang.String)}.
*/
@Test
public void testGetContinentCode()
{
assertEquals(LocationUtils.getContinentCode("US"), "NA");
assertTrue(LocationUtils.getContinentCode(null).length() > 2); // message
assertTrue(LocationUtils.getContinentCode("xyz").length() > 2); // message
}
/**
* Test method for {@link org.dspace.statistics.util.LocationUtils#getContinentName(java.lang.String)}.
*/
@Test
public void testGetContinentNameString()
{
assertEquals("North America", LocationUtils.getContinentName("NA"));
assertEquals(UNKNOWN_CONTINENT, LocationUtils.getContinentName(null));
assertEquals(UNKNOWN_CONTINENT, LocationUtils.getContinentName("XXXX"));
}
/**
* Test method for {@link org.dspace.statistics.util.LocationUtils#getContinentName(java.lang.String, java.util.Locale)}.
*/
@Test
public void testGetContinentNameStringLocale()
{
assertEquals("North America", LocationUtils.getContinentName(
"NA", Locale.ENGLISH));
}
/**
* Test method for {@link org.dspace.statistics.util.LocationUtils#getCountryName(java.lang.String)}.
*/
@Test
public void testGetCountryNameString()
{
assertEquals("United States", LocationUtils.getCountryName(
"US"));
assertEquals(UNKNOWN_COUNTRY, LocationUtils.getCountryName(null));
assertEquals("XX", LocationUtils.getCountryName("XX"));
}
/**
* Test method for {@link org.dspace.statistics.util.LocationUtils#getCountryName(java.lang.String, java.util.Locale)}.
*/
@Test
public void testGetCountryNameStringLocale()
{
assertEquals("United States", LocationUtils.getCountryName(
"US", Locale.ENGLISH));
}
}