67688: Add IRUS patch to DSpace 7

This commit is contained in:
Yana De Pauw
2020-01-09 13:19:43 +01:00
parent c8bbe99e37
commit 46a3407642
25 changed files with 1272 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export;
import org.dspace.services.EventService;
import org.dspace.services.model.EventListener;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.config.BeanPostProcessor;
/**
* AbstractUsageEventListener is used as the base class for listening events running
* in the EventService.
*
* @author Mark Diggory (mdiggory at atmire.com)
* @version $Revision: $
*/
public abstract class AbstractUsageEventListener implements EventListener, BeanPostProcessor {
public AbstractUsageEventListener() {
super();
}
@Override
public Object postProcessAfterInitialization(Object bean, String beanName) throws BeansException {
return bean;
}
@Override
public Object postProcessBeforeInitialization(Object bean, String beanName) throws BeansException {
if (beanName.equals("org.dspace.services.EventService")) {
setEventService((EventService) bean);
}
return bean;
}
/**
* Empty String[] flags to have Listener
* consume any event name prefixes.
*/
public String[] getEventNamePrefixes() {
return new String[0];
}
/**
* Currently consumes events generated for
* all resources.
*/
public String getResourcePrefix() {
return null;
}
public void setEventService(EventService service) {
if (service != null) {
service.registerEventListener(this);
} else {
throw new IllegalStateException("EventService handed to Listener cannot be null");
}
}
}

View File

@@ -0,0 +1,417 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.UUID;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.app.util.Util;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.DCDate;
import org.dspace.content.Item;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.MetadataFieldService;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.services.model.Event;
import org.dspace.statistics.export.factory.OpenURLTrackerLoggerServiceFactory;
import org.dspace.statistics.export.service.OpenURLTrackerLoggerService;
import org.dspace.statistics.util.SpiderDetector;
import org.dspace.usage.AbstractUsageEventListener;
import org.dspace.usage.UsageEvent;
/**
* User: kevin (kevin at atmire.com)
* Date: 30-mrt-2010
* Time: 16:37:56
*/
public class ExportUsageEventListener extends AbstractUsageEventListener {
/* Log4j logger*/
private static Logger log = Logger.getLogger(ExportUsageEventListener.class);
/* The metadata field which is to be checked for */
private static MetadataField trackerType;
/* A list of values the type might have */
private static List<String> trackerValues;
/* The base url of the tracker */
private static String baseUrl;
private static String trackerUrlVersion;
private static final String ITEM_VIEW = "Investigation";
private static final String BITSTREAM_DOWNLOAD = "Request";
private static ConfigurationService configurationService;
public void init(Context context) {
try {
if (configurationService == null) {
configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
}
if (trackerType == null) {
trackerType = resolveConfigPropertyToMetadataField(context, "tracker.type-field");
String[] metadataValues = configurationService.getArrayProperty("stats.tracker.type-value");
if (metadataValues.length > 0) {
trackerValues = new ArrayList<>();
for (String metadataValue : metadataValues) {
trackerValues.add(metadataValue.toLowerCase());
}
} else {
trackerValues = null;
}
if (StringUtils.equals(configurationService.getProperty("stats.tracker.environment"), "production")) {
baseUrl = configurationService.getProperty("stats.tracker.produrl");
} else {
baseUrl = configurationService.getProperty("stats.tracker.testurl");
}
trackerUrlVersion = configurationService.getProperty("stats.tracker.urlversion");
}
} catch (Exception e) {
log.error("Unknown error resolving configuration for the export usage event.", e);
trackerType = null;
trackerValues = null;
baseUrl = null;
trackerUrlVersion = null;
}
}
public void receiveEvent(Event event) {
if (event instanceof UsageEvent) {
UsageEvent ue = (UsageEvent) event;
Context context = ue.getContext();
try {
//Check for item investigation
if (ue.getObject() instanceof Item) {
Item item = (Item) ue.getObject();
if (item.isArchived() && !ContentServiceFactory.getInstance().getItemService()
.canEdit(context, item)) {
init(context);
if (shouldProcessItem(item)) {
processItem(ue.getContext(), item, null, ue.getRequest(), ITEM_VIEW);
}
}
}
//Check for bitstream download
if (ue.getObject() instanceof Bitstream) {
Bitstream bit = (Bitstream) ue.getObject();
//Check for an item
if (0 < bit.getBundles().size()) {
if (!SpiderDetector.isSpider(ue.getRequest())) {
Bundle bundle = bit.getBundles().get(0);
if (bundle.getName() == null || !bundle.getName().equals("ORIGINAL")) {
return;
}
if (0 < bundle.getItems().size()) {
Item item = bundle.getItems().get(0);
if (item.isArchived() && !ContentServiceFactory.getInstance().getItemService()
.canEdit(context, item)) {
//Check if we have a valid type of item !
init(context);
if (shouldProcessItem(item)) {
processItem(ue.getContext(), item, bit, ue.getRequest(), BITSTREAM_DOWNLOAD);
}
}
}
} else {
log.info("Robot (" + ue.getRequest().getHeader("user-agent") + ") accessed " + bit
.getName() + "/" + bit.getSource());
}
}
}
} catch (Exception e) {
UUID id;
id = ue.getObject().getID();
int type;
try {
type = ue.getObject().getType();
} catch (Exception e1) {
type = -1;
}
log.error(LogManager.getHeader(ue.getContext(), "Error while processing export of use event",
"Id: " + id + " type: " + type), e);
e.printStackTrace();
}
}
}
private boolean shouldProcessItem(Item item) {
if (trackerType != null && trackerValues != null) {
List<MetadataValue> types = ContentServiceFactory.getInstance().getItemService()
.getMetadata(item, trackerType.getMetadataSchema().getName(),
trackerType.getElement(),
trackerType.getQualifier(), Item.ANY);
if (!types.isEmpty()) {
//Find out if we have a type that needs to be excluded
for (MetadataValue type : types) {
if (trackerValues.contains(type.getValue().toLowerCase())) {
//We have found no type so process this item
return false;
}
}
return true;
} else {
// No types in this item, so not excluded
return true;
}
} else {
// No types to be excluded
return true;
}
}
private void processItem(Context context, Item item, Bitstream bitstream, HttpServletRequest request,
String eventType) throws IOException, SQLException {
//We have a valid url collect the rest of the data
String clientIP = request.getRemoteAddr();
if (configurationService.getBooleanProperty("useProxies", false) && request
.getHeader("X-Forwarded-For") != null) {
/* This header is a comma delimited list */
for (String xfip : request.getHeader("X-Forwarded-For").split(",")) {
/* proxy itself will sometime populate this header with the same value in
remote address. ordering in spec is vague, we'll just take the last
not equal to the proxy
*/
if (!request.getHeader("X-Forwarded-For").contains(clientIP)) {
clientIP = xfip.trim();
}
}
}
String clientUA = StringUtils.defaultIfBlank(request.getHeader("USER-AGENT"), "");
String referer = StringUtils.defaultIfBlank(request.getHeader("referer"), "");
//Start adding our data
StringBuilder data = new StringBuilder();
data.append(URLEncoder.encode("url_ver", "UTF-8") + "=" + URLEncoder.encode(trackerUrlVersion, "UTF-8"));
data.append("&").append(URLEncoder.encode("req_id", "UTF-8")).append("=")
.append(URLEncoder.encode(clientIP, "UTF-8"));
data.append("&").append(URLEncoder.encode("req_dat", "UTF-8")).append("=")
.append(URLEncoder.encode(clientUA, "UTF-8"));
data.append("&").append(URLEncoder.encode("rft.artnum", "UTF-8")).append("=").
append(URLEncoder.encode("oai:" + configurationService.getProperty("dspace.hostname") + ":" + item
.getHandle(), "UTF-8"));
data.append("&").append(URLEncoder.encode("rfr_dat", "UTF-8")).append("=")
.append(URLEncoder.encode(referer, "UTF-8"));
data.append("&").append(URLEncoder.encode("rfr_id", "UTF-8")).append("=")
.append(URLEncoder.encode(configurationService.getProperty("dspace.hostname"), "UTF-8"));
data.append("&").append(URLEncoder.encode("url_tim", "UTF-8")).append("=")
.append(URLEncoder.encode(new DCDate(new Date()).toString(), "UTF-8"));
if (BITSTREAM_DOWNLOAD.equals(eventType)) {
String bitstreamInfo = getBitstreamInfo(item, bitstream);
data.append("&").append(URLEncoder.encode("svc_dat", "UTF-8")).append("=")
.append(URLEncoder.encode(bitstreamInfo, "UTF-8"));
data.append("&").append(URLEncoder.encode("rft_dat", "UTF-8")).append("=")
.append(URLEncoder.encode(BITSTREAM_DOWNLOAD, "UTF-8"));
} else if (ITEM_VIEW.equals(eventType)) {
String itemInfo = getItemInfo(item);
data.append("&").append(URLEncoder.encode("svc_dat", "UTF-8")).append("=")
.append(URLEncoder.encode(itemInfo, "UTF-8"));
data.append("&").append(URLEncoder.encode("rft_dat", "UTF-8")).append("=")
.append(URLEncoder.encode(ITEM_VIEW, "UTF-8"));
}
processUrl(context, baseUrl + "?" + data.toString());
}
private String getBitstreamInfo(final Item item, final Bitstream bitstream) {
//only for jsp ui
// http://demo.dspace.org/jspui/handle/10673/2235
// http://demo.dspace.org/jspui/bitstream/10673/2235/1/Captura.JPG
//
//only fror xmlui
// http://demo.dspace.org/xmlui/handle/10673/2235
// http://demo.dspace.org/xmlui/bitstream/handle/10673/2235/Captura.JPG?sequence=1
//
String uiType = configurationService.getProperty("stats.dspace.type");
StringBuilder sb = new StringBuilder(configurationService.getProperty("dspace.url"));
if ("jspui".equals(uiType)) {
sb.append("/bitstream/").append(item.getHandle()).append("/").append(bitstream.getSequenceID());
// If we can, append the pretty name of the bitstream to the URL
try {
if (bitstream.getName() != null) {
sb.append("/").append(Util.encodeBitstreamName(bitstream.getName(), "UTF-8"));
}
} catch (UnsupportedEncodingException uee) {
// just ignore it, we don't have to have a pretty
// name at the end of the URL because the sequence id will
// locate it. However it means that links in this file might
// not work....
}
} else { //xmlui
String identifier = null;
if (item != null && item.getHandle() != null) {
identifier = "handle/" + item.getHandle();
} else if (item != null) {
identifier = "item/" + item.getID();
} else {
identifier = "id/" + bitstream.getID();
}
sb.append("/bitstream/").append(identifier).append("/");
// If we can, append the pretty name of the bitstream to the URL
try {
if (bitstream.getName() != null) {
sb.append(Util.encodeBitstreamName(bitstream.getName(), "UTF-8"));
}
} catch (UnsupportedEncodingException uee) {
// just ignore it, we don't have to have a pretty
// name at the end of the URL because the sequence id will
// locate it. However it means that links in this file might
// not work....
}
sb.append("?sequence=").append(bitstream.getSequenceID());
}
return sb.toString();
}
private String getItemInfo(final Item item) {
StringBuilder sb = new StringBuilder(configurationService.getProperty("dspace.url"));
sb.append("/handle/").append(item.getHandle());
return sb.toString();
}
private static void processUrl(Context c, String urlStr) throws IOException, SQLException {
log.debug("Prepared to send url to tracker URL: " + urlStr);
System.out.println(urlStr);
URLConnection conn;
try {
// Send data
URL url = new URL(urlStr);
conn = url.openConnection();
if (((HttpURLConnection) conn).getResponseCode() != 200) {
ExportUsageEventListener.logfailed(c, urlStr);
} else if (log.isDebugEnabled()) {
log.debug("Successfully posted " + urlStr + " on " + new Date());
}
} catch (Exception e) {
log.error("Failed to send url to tracker URL: " + urlStr);
ExportUsageEventListener.logfailed(c, urlStr);
}
}
private static void tryReprocessFailed(Context context, OpenURLTracker tracker) throws SQLException {
boolean success = false;
URLConnection conn;
try {
URL url = new URL(tracker.getUrl());
conn = url.openConnection();
if (((HttpURLConnection) conn).getResponseCode() == HttpURLConnection.HTTP_OK) {
success = true;
}
} catch (Exception e) {
success = false;
} finally {
if (success) {
OpenURLTrackerLoggerServiceFactory.getInstance().getOpenUrlTrackerLoggerService()
.remove(context, tracker);
// If the tracker was able to post successfully, we remove it from the database
log.info("Successfully posted " + tracker.getUrl() + " from " + tracker.getUploadDate());
} else {
// Still no luck - write an error msg but keep the entry in the table for future executions
log.error("Failed attempt from " + tracker.getUrl() + " originating from " + tracker.getUploadDate());
}
}
}
public static void reprocessFailedQueue(Context context) throws SQLException {
Context c = new Context();
OpenURLTrackerLoggerServiceFactory instance = OpenURLTrackerLoggerServiceFactory.getInstance();
if (instance == null) {
log.error("Error retrieving the \"OpenURLTrackerLoggerServiceFactory\" instance, aborting the processing");
return;
}
OpenURLTrackerLoggerService openUrlTrackerLoggerService = instance.getOpenUrlTrackerLoggerService();
if (openUrlTrackerLoggerService == null) {
log.error("Error retrieving the \"openUrlTrackerLoggerService\" instance, aborting the processing");
return;
}
List<OpenURLTracker> openURLTrackers = openUrlTrackerLoggerService.findAll(c);
for (OpenURLTracker openURLTracker : openURLTrackers) {
ExportUsageEventListener.tryReprocessFailed(context, openURLTracker);
}
try {
c.abort();
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
public static void logfailed(Context context, String url) throws SQLException {
Date now = new Date();
if (url.equals("")) {
return;
}
OpenURLTrackerLoggerService service = OpenURLTrackerLoggerServiceFactory.getInstance()
.getOpenUrlTrackerLoggerService();
OpenURLTracker tracker = service.create(context);
tracker.setUploadDate(now);
tracker.setUrl(url);
// TODO service tracker update
}
private static MetadataField resolveConfigPropertyToMetadataField(Context context, String fieldName)
throws SQLException {
String metadataField = configurationService.getProperty("stats." + fieldName);
if (metadataField != null && 0 < metadataField.trim().length()) {
metadataField = metadataField.trim();
MetadataFieldService metadataFieldService = ContentServiceFactory.getInstance().getMetadataFieldService();
return metadataFieldService
.findByElement(context, metadataField.split("\\.")[0], metadataField.split("\\.")[1],
metadataField.split("\\.").length == 2 ? null : metadataField.split("\\.")[2]);
}
return null;
}
}

View File

@@ -0,0 +1,95 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.SequenceGenerator;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import org.dspace.core.ReloadableEntity;
import org.hibernate.proxy.HibernateProxyHelper;
/**
* Created by jonas - jonas@atmire.com on 09/02/17.
*/
@Entity
@Table(name = "OpenUrlTracker")
public class OpenURLTracker implements ReloadableEntity<Integer> {
@Id
@Column(name = "tracker_id")
@GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "openurltracker_seq")
@SequenceGenerator(name = "openurltracker_seq", sequenceName = "openurltracker_seq", allocationSize = 1)
private Integer id;
@Column(name = "tracker_url", length = 1000)
private String url;
@Column(name = "uploaddate")
@Temporal(TemporalType.DATE)
private Date uploadDate;
protected OpenURLTracker() {
}
@Override
public Integer getID() {
return id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public Date getUploadDate() {
return uploadDate;
}
public void setUploadDate(Date uploadDate) {
this.uploadDate = uploadDate;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
Class<?> objClass = HibernateProxyHelper.getClassWithoutInitializingProxy(o);
if (getClass() != objClass) {
return false;
}
final OpenURLTracker that = (OpenURLTracker) o;
if (this.getID() != that.getID()) {
return false;
}
return true;
}
@Override
public int hashCode() {
int hash = 8;
hash = 74 * hash + this.getID();
return hash;
}
}

View File

@@ -0,0 +1,41 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export;
import java.sql.SQLException;
import java.util.List;
import org.dspace.core.Context;
import org.dspace.statistics.export.dao.OpenURLTrackerDAO;
import org.dspace.statistics.export.service.OpenURLTrackerLoggerService;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Created by jonas - jonas@atmire.com on 09/02/17.
*/
public class OpenURLTrackerLoggerServiceImpl implements OpenURLTrackerLoggerService {
@Autowired(required = true)
protected OpenURLTrackerDAO openURLTrackerDAO;
@Override
public void remove(Context context, OpenURLTracker openURLTracker) throws SQLException {
openURLTrackerDAO.delete(context, openURLTracker);
}
@Override
public List<OpenURLTracker> findAll(Context context) throws SQLException {
return openURLTrackerDAO.findAll(context, OpenURLTracker.class);
}
@Override
public OpenURLTracker create(Context context) throws SQLException {
OpenURLTracker openURLTracker = openURLTrackerDAO.create(context, new OpenURLTracker());
return openURLTracker;
}
}

View File

@@ -0,0 +1,74 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.core.Context;
import org.dspace.scripts.DSpaceRunnable;
public class RetryOpenUrlTracker extends DSpaceRunnable {
private static final Logger log = Logger.getLogger(RetryOpenUrlTracker.class);
private Context context = null;
private String lineToAdd = null;
private boolean help = false;
public void internalRun() throws Exception {
if (help) {
printHelp();
return;
}
context.turnOffAuthorisationSystem();
if (StringUtils.isNotBlank(lineToAdd)) {
ExportUsageEventListener.logfailed(context, lineToAdd);
log.info("Created dummy entry in OpenUrlTracker with URL: " + lineToAdd);
} else {
ExportUsageEventListener.reprocessFailedQueue(context);
}
context.restoreAuthSystemState();
try {
context.complete();
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
public void setup() throws ParseException {
context = new Context();
if (commandLine.hasOption('h')) {
help = true;
}
if (commandLine.hasOption('a')) {
lineToAdd = commandLine.getOptionValue('a');
}
}
private RetryOpenUrlTracker() {
Options options = constructOptions();
this.options = options;
}
private Options constructOptions() {
Options options = new Options();
options.addOption("a", true, "Add a new \"failed\" row to the table with a url (test purposes only)");
options.getOption("a").setType(String.class);
options.addOption("h", "help", false, "print this help message");
options.getOption("h").setType(boolean.class);
return options;
}
}

View File

@@ -0,0 +1,289 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.http.HttpServletRequest;
import org.apache.log4j.Logger;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.statistics.factory.StatisticsServiceFactory;
import org.dspace.statistics.util.IPTable;
import org.dspace.statistics.util.SpiderDetectorService;
/**
* SpiderDetector is used to find IP's that are spiders...
* In future someone may add UserAgents and Host Domains
* to the detection criteria here.
*
* @author kevinvandevelde at atmire.com
* @author ben at atmire.com
* @author Mark Diggory (mdiggory at atmire.com)
* @author Kevin Van Ransbeeck at atmire.com
*/
public class SpiderDetector {
private static final Logger log = Logger.getLogger(SpiderDetector.class);
//Service where all methods get delegated to, this is instantiated by a spring-bean defined in core-services.xml
private static SpiderDetectorService spiderDetectorService = StatisticsServiceFactory.getInstance()
.getSpiderDetectorService();
private SpiderDetector() { }
/**
* Sparse HAshTable structure to hold IP Address Ranges.
*/
private static IPTable table = null;
private static Set<Pattern> spidersRegex = Collections.synchronizedSet(new HashSet<Pattern>());
private static Set<String> spidersMatched = null;
/**
* Utility method which Reads the ip addresses out a file & returns them in a Set
*
* @param spiderIpFile the location of our spider file
* @return a vector full of ip's
* @throws IOException could not happen since we check the file be4 we use it
*/
public static Set<String> readIpAddresses(File spiderIpFile) throws IOException {
Set<String> ips = new HashSet<>();
if (!spiderIpFile.exists() || !spiderIpFile.isFile()) {
return ips;
}
//Read our file & get all them ip's
try (BufferedReader in = new BufferedReader(new FileReader(spiderIpFile))) {
String line;
while ((line = in.readLine()) != null) {
if (!line.startsWith("#")) {
line = line.trim();
if (!line.equals("") && !Character.isDigit(line.charAt(0))) {
// is a hostname
// add this functionality later...
} else if (!line.equals("")) {
ips.add(line);
// is full v4 ip (too tired to deal with v6)...
}
} else {
// ua.add(line.replaceFirst("#","").replaceFirst("UA","").trim());
// ... add this functionality later
}
}
}
return ips;
}
/**
* Get an immutable Set representing all the Spider Addresses here
*
* @return Set<String> setOfIpAddresses
*/
public static Set<String> getSpiderIpAddresses() {
loadSpiderIpAddresses();
return table.toSet();
}
/*
private loader to populate the table from files.
*/
private static synchronized void loadSpiderIpAddresses() {
if (table == null) {
table = new IPTable();
String filePath = DSpaceServicesFactory.getInstance().getConfigurationService().getProperty("dspace.dir");
try {
File spidersDir = new File(filePath, "config/spiders");
if (spidersDir.exists() && spidersDir.isDirectory()) {
for (File file : spidersDir.listFiles()) {
for (String ip : readIpAddresses(file)) {
table.add(ip);
}
log.info("Loaded Spider IP file: " + file);
}
} else {
log.info("No spider file loaded");
}
} catch (Exception e) {
log.error("Error Loading Spiders:" + e.getMessage(), e);
}
}
}
/**
* Static Service Method for testing spiders against existing spider files.
* <p/>
* In the future this will be extended to support User Agent and
* domain Name detection.
* <p/>
* In future spiders HashSet may be optimized as byte offset array to
* improve performance and memory footprint further.
*
* @param request
* @return true|false if the request was detected to be from a spider
*/
public static boolean isSpider(HttpServletRequest request) {
/*
* 1) If the IP address matches the spider IP addresses (this is the current implementation)
*/
boolean checkSpidersIP = DSpaceServicesFactory.getInstance().getConfigurationService()
.getPropertyAsType("stats.spider.ipmatch.enabled", true, true);
if (checkSpidersIP) {
if (StatisticsServiceFactory.getInstance().getSolrLoggerService().isUseProxies() && request
.getHeader("X-Forwarded-For") != null) {
/* This header is a comma delimited list */
for (String xfip : request.getHeader("X-Forwarded-For").split(",")) {
if (isSpider(xfip)) {
log.debug("spider.ipmatch");
return true;
}
}
} else if (isSpider(request.getRemoteAddr())) {
log.debug("spider.ipmatch");
return true;
}
}
/*
* 2) if the user-agent header is empty - DISABLED BY DEFAULT -
*/
boolean checkSpidersEmptyAgent = DSpaceServicesFactory.getInstance().getConfigurationService()
.getPropertyAsType("stats.spider.agentempty.enabled",
false, true);
if (checkSpidersEmptyAgent) {
if (request.getHeader("user-agent") == null || request.getHeader("user-agent").length() == 0) {
log.debug("spider.agentempty");
return true;
}
}
/*
* 3) if the user-agent corresponds to one of the regexes at http://www.projectcounter
* .org/r4/COUNTER_robot_txt_list_Jan_2011.txt
*/
boolean checkSpidersTxt = DSpaceServicesFactory.getInstance().getConfigurationService()
.getPropertyAsType("stats.spider.agentregex.enabled", true,
true);
if (checkSpidersTxt) {
String userAgent = request.getHeader("user-agent");
if (userAgent != null && !userAgent.equals("")) {
return isSpiderRegex(userAgent);
}
}
return false;
}
/**
* Check individual IP is a spider.
*
* @param ip
* @return if is spider IP
*/
public static boolean isSpider(String ip) {
if (table == null) {
spiderDetectorService.loadSpiderIpAddresses();
}
try {
if (table.contains(ip)) {
return true;
}
} catch (Exception e) {
return false;
}
return false;
}
/**
* Checks the user-agent string vs a set of known regexes from spiders
* A second Set is kept for fast-matching.
* If a user-agent is matched once, it is added to this set with "known agents".
* If this user-agent comes back later, we can do a quick lookup in this set,
* instead of having to loop over the entire set with regexes again.
*
* @param userAgent String
* @return true if the user-agent matches a regex
*/
public static boolean isSpiderRegex(String userAgent) {
if (spidersMatched != null && spidersMatched.contains(userAgent)) {
log.debug("spider.agentregex");
return true;
} else {
synchronized (spidersRegex) {
if (spidersRegex.isEmpty()) {
loadSpiderRegexFromFile();
}
}
if (spidersRegex != null) {
for (Object regex : spidersRegex.toArray()) {
Matcher matcher = ((Pattern) regex).matcher(userAgent);
if (matcher.find()) {
if (spidersMatched == null) {
spidersMatched = new HashSet<>();
}
if (spidersMatched.size() >= 100) {
spidersMatched.clear();
}
spidersMatched.add(userAgent);
log.debug("spider.agentregex");
return true;
}
}
}
return false;
}
}
/**
* Populate static Set spidersRegex from local txt file.
* Original file downloaded from http://www.projectcounter.org/r4/COUNTER_robot_txt_list_Jan_2011.txt during build
*/
public static void loadSpiderRegexFromFile() {
String spidersTxt = DSpaceServicesFactory.getInstance().getConfigurationService()
.getPropertyAsType("stats.spider.agentregex.regexfile", String.class);
DataInputStream in = null;
try {
FileInputStream fstream = new FileInputStream(spidersTxt);
in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
while ((strLine = br.readLine()) != null) {
spidersRegex.add(Pattern.compile(strLine, Pattern.CASE_INSENSITIVE));
}
log.info("Loaded Spider Regex file: " + spidersTxt);
} catch (FileNotFoundException e) {
log.error("File with spiders regex not found @ " + spidersTxt);
} catch (IOException e) {
log.error("Could not read from file " + spidersTxt);
} finally {
try {
if (in != null) {
in.close();
}
} catch (IOException e) {
log.error("Could not close file " + spidersTxt);
}
}
}
}

View File

@@ -0,0 +1,19 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export.dao;
import org.dspace.core.GenericDAO;
import org.dspace.statistics.export.OpenURLTracker;
/**
* Created by jonas - jonas@atmire.com on 09/02/17.
*/
public interface OpenURLTrackerDAO extends GenericDAO<OpenURLTracker> {
}

View File

@@ -0,0 +1,24 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export.dao.impl;
import org.dspace.core.AbstractHibernateDAO;
import org.dspace.statistics.export.OpenURLTracker;
import org.dspace.statistics.export.dao.OpenURLTrackerDAO;
/**
* Created by jonas - jonas@atmire.com on 09/02/17.
*/
public class OpenURLTrackerDAOImpl extends AbstractHibernateDAO<OpenURLTracker> implements OpenURLTrackerDAO {
protected OpenURLTrackerDAOImpl() {
super();
}
}

View File

@@ -0,0 +1,27 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export.factory;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.dspace.statistics.export.service.OpenURLTrackerLoggerService;
/**
* Created by jonas - jonas@atmire.com on 09/02/17.
*/
public abstract class OpenURLTrackerLoggerServiceFactory {
public abstract OpenURLTrackerLoggerService getOpenUrlTrackerLoggerService();
public static OpenURLTrackerLoggerServiceFactory getInstance() {
return DSpaceServicesFactory.getInstance().getServiceManager()
.getServiceByName("openURLTrackerLoggerServiceFactory",
OpenURLTrackerLoggerServiceFactory.class);
}
}

View File

@@ -0,0 +1,25 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export.factory;
import org.dspace.statistics.export.service.OpenURLTrackerLoggerService;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Created by jonas - jonas@atmire.com on 09/02/17.
*/
public class OpenURLTrackerLoggerServiceFactoryImpl extends OpenURLTrackerLoggerServiceFactory {
@Autowired(required = true)
private OpenURLTrackerLoggerService openURLTrackerLoggerService;
@Override
public OpenURLTrackerLoggerService getOpenUrlTrackerLoggerService() {
return openURLTrackerLoggerService;
}
}

View File

@@ -0,0 +1,26 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.export.service;
import java.sql.SQLException;
import java.util.List;
import org.dspace.core.Context;
import org.dspace.statistics.export.OpenURLTracker;
/**
* Created by jonas - jonas@atmire.com on 09/02/17.
*/
public interface OpenURLTrackerLoggerService {
void remove(Context context, OpenURLTracker openURLTracker) throws SQLException;
List<OpenURLTracker> findAll(Context context) throws SQLException;
OpenURLTracker create(Context context) throws SQLException;
}

View File

@@ -0,0 +1,29 @@
--
-- The contents of this file are subject to the license and copyright
-- detailed in the LICENSE and NOTICE files at the root of the source
-- tree and available online at
--
-- http://www.dspace.org/license/
--
-- ===============================================================
-- WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
--
-- DO NOT MANUALLY RUN THIS DATABASE MIGRATION. IT WILL BE EXECUTED
-- AUTOMATICALLY (IF NEEDED) BY "FLYWAY" WHEN YOU STARTUP DSPACE.
-- http://flywaydb.org/
-- ===============================================================
-------------------------------------------------------------
-- This will create the setup for the IRUS statistics harvester
-------------------------------------------------------------
CREATE SEQUENCE openurltracker_seq;
CREATE TABLE OpenUrlTracker
(
tracker_id INTEGER,
tracker_url VARCHAR(1000),
uploaddate DATE,
CONSTRAINT OpenUrlTracker_PK PRIMARY KEY (tracker_id)
);

View File

@@ -0,0 +1,29 @@
--
-- The contents of this file are subject to the license and copyright
-- detailed in the LICENSE and NOTICE files at the root of the source
-- tree and available online at
--
-- http://www.dspace.org/license/
--
-- ===============================================================
-- WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
--
-- DO NOT MANUALLY RUN THIS DATABASE MIGRATION. IT WILL BE EXECUTED
-- AUTOMATICALLY (IF NEEDED) BY "FLYWAY" WHEN YOU STARTUP DSPACE.
-- http://flywaydb.org/
-- ===============================================================
-------------------------------------------------------------
-- This will create the setup for the IRUS statistics harvester
-------------------------------------------------------------
CREATE SEQUENCE openurltracker_seq;
CREATE TABLE OpenUrlTracker
(
tracker_id NUMBER,
tracker_url VARCHAR2(1000),
uploaddate DATE,
CONSTRAINT OpenUrlTracker_PK PRIMARY KEY (tracker_id)
);

View File

@@ -0,0 +1,29 @@
--
-- The contents of this file are subject to the license and copyright
-- detailed in the LICENSE and NOTICE files at the root of the source
-- tree and available online at
--
-- http://www.dspace.org/license/
--
-- ===============================================================
-- WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
--
-- DO NOT MANUALLY RUN THIS DATABASE MIGRATION. IT WILL BE EXECUTED
-- AUTOMATICALLY (IF NEEDED) BY "FLYWAY" WHEN YOU STARTUP DSPACE.
-- http://flywaydb.org/
-- ===============================================================
-------------------------------------------------------------
-- This will create the setup for the IRUS statistics harvester
-------------------------------------------------------------
CREATE SEQUENCE openurltracker_seq;
CREATE TABLE OpenUrlTracker
(
tracker_id NUMBER,
tracker_url VARCHAR2(1000),
uploaddate DATE,
CONSTRAINT OpenUrlTracker_PK PRIMARY KEY (tracker_id)
);

View File

@@ -2015,3 +2015,4 @@ include = ${module_dir}/translator.cfg
include = ${module_dir}/usage-statistics.cfg
include = ${module_dir}/versioning.cfg
include = ${module_dir}/workflow.cfg
include = ${module_dir}/stats.cfg

View File

@@ -82,5 +82,7 @@
<mapping class="org.dspace.xmlworkflow.storedcomponents.WorkflowItemRole"/>
<mapping class="org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem"/>
<mapping class="org.dspace.statistics.export.OpenURLTracker"/>
</session-factory>
</hibernate-configuration>

View File

@@ -208,6 +208,13 @@
<class>org.dspace.administer.RegistryLoader</class>
</step>
</command>
<command>
<name>retry-tracker</name>
<description>Retry all failed commits to the OpenURLTracker</description>
<step>
<class>org.dspace.statistics.export.RetryOpenUrlTracker</class>
</step>
</command>
<command>
<name>solr-export-statistics</name>
<description>Export usage statistics data from Solr for back-up purposes</description>

View File

@@ -71,6 +71,8 @@
<AppenderRef ref='A2'/>
</logger>
<logger name='org.dspace.statistics.export.ExportUsageEventListener' level='DEBUG' />
# Block services logging except on exceptions
<logger name='org.dspace.kernel'
level='ERROR'/>

View File

@@ -0,0 +1,35 @@
#-----------------------#
# Atmire stats exporter #
#-----------------------#
# OPTIONAL metadata field used for filtering.
# If items with specific values for the "dc.type" field should be excluded, "dc.type" should be placed here.
# This should comply to the syntax schema.element.qualified or schema.element if the qualifier is null.
# stats.tracker.type-field = dc.type
# If "tracker.type-field" is set, the list of values must be defined in "tracker.type-value".
# This lists a comma separated list of values that will be excluded for the given field.
# stats.tracker.type-value = Article, Postprint
# Set the tracker environment to "test" or "production". Defaults to "test" if empty.
# The URL used by the test environment can be configured in property tracker.testurl
# The URL used by the production environment can be configured in property tracker.produrl
stats.tracker.environment = test
# The url used to test the submission of tracking info to.
stats.tracker.testurl = https://irus.jisc.ac.uk/counter/test/
# The base url for submitting the tracking info to.
stats.tracker.produrl = https://irus.jisc.ac.uk/counter/
# Identifies data as OpenURL 1.0
stats.tracker.urlversion = Z39.88-2004
# The deployed user interface should be provided to build correct links to files.
# The dspace.type field can be set to either "xmlui" or "jspui".
stats.dspace.type = xmlui
# Spider options
stats.spider.ipmatch.enabled = true
stats.spider.agentempty.enabled = false
stats.spider.agentregex.enabled = true
# Default is downloaded during build: ${dspace.dir}/config/COUNTER_Robots_list.txt
stats.spider.agentregex.regexfile = ${dspace.dir}/config/COUNTER_Robots_list.txt

View File

@@ -62,6 +62,7 @@
<bean class="org.dspace.xmlworkflow.storedcomponents.dao.impl.PoolTaskDAOImpl"/>
<bean class="org.dspace.xmlworkflow.storedcomponents.dao.impl.WorkflowItemRoleDAOImpl"/>
<bean class="org.dspace.xmlworkflow.storedcomponents.dao.impl.XmlWorkflowItemDAOImpl"/>
<bean class="org.dspace.statistics.export.dao.impl.OpenURLTrackerDAOImpl"/>

View File

@@ -47,4 +47,6 @@
<!--Configurable workflow services -->
<bean id="workflowServiceFactory" class="org.dspace.xmlworkflow.factory.XmlWorkflowServiceFactoryImpl"/>
<bean id="openURLTrackerLoggerServiceFactory" class="org.dspace.statistics.export.factory.OpenURLTrackerLoggerServiceFactoryImpl"/>
</beans>

View File

@@ -117,6 +117,7 @@
<bean class="org.dspace.xmlworkflow.WorkflowRequirementsServiceImpl"/>
<bean class="org.dspace.xmlworkflow.XmlWorkflowFactoryImpl"/>
<bean class="org.dspace.statistics.export.OpenURLTrackerLoggerServiceImpl"/>
</beans>

View File

@@ -9,5 +9,9 @@
<property name="description" value="Update Discovery Solr Search Index"/>
</bean>
<bean id="retryOpenUrlTracker" class="org.dspace.statistics.export.RetryOpenUrlTracker" scope="prototype">
<property name="name" value="retry-tracker"/>
<property name="description" value="Retry all failed commits to the OpenURLTracker"/>
</bean>
</beans>

View File

@@ -21,4 +21,9 @@
<property name="eventService" ref="org.dspace.services.EventService"/>
</bean>
<!-- Irus statistics tracking -->
<bean class="org.dspace.statistics.export.ExportUsageEventListener">
<property name="eventService" ref="org.dspace.services.EventService"/>
</bean>
</beans>

View File

@@ -118,6 +118,7 @@ Common usage:
<echo message=" touching your data" />
<echo message="update_configs --> Update your configs directory with new configuration files"/>
<echo message="update_geolite --> Dowload and install GeoCity database into ${dspace.dir}/config" />
<echo message="update_spiders --> Dowload and install Spider Robots database into ${dspace.dir}/config" />
<echo message="update_code --> Update compiled code (bin, lib, and etc directories)" />
<echo message="update_webapps --> Update web applications" />
<echo message="" />
@@ -180,6 +181,7 @@ Common usage:
<!-- ============================================================= -->
<target name="update_configs" depends="overwrite_configs,overwrite_solr_configs" description="Updates the Configuration Directory">
<antcall target="init_geolite" />
<antcall target="init_spiders" />
</target>
<target name="overwrite_configs" description="Overwrites a configuration directory." if="${overwrite}" depends="copy_configs_keep">
@@ -834,6 +836,8 @@ Common usage:
<antcall target="init_geolite" />
<antcall target="init_spiders" />
<echo>
====================================================================
The DSpace code has been installed.
@@ -920,4 +924,22 @@ You may manually install this file by following these steps:
<antcall target="update_geolite" />
</target>
<!-- installs and/or updates Project Counter Robot List resolution database -->
<target name="update_spiders">
<echo>Downloading: https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/generated/COUNTER_Robots_list.txt</echo>
<get src="https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/generated/COUNTER_Robots_list.txt" dest="${dspace.dir}/config/COUNTER_Robots_list.txt" verbose="true" />
</target>
<target name="check_spiders">
<condition property="need.spiders">
<not>
<available file="${dspace.dir}/config/COUNTER_Robots_list.txt" />
</not>
</condition>
</target>
<target name="init_spiders" depends="check_spiders" if="need.spiders">
<antcall target="update_spiders" />
</target>
</project>