mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 01:54:22 +00:00
Update testing for new features; remove Spring-based configuration.
This commit is contained in:
@@ -1,28 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
The contents of this file are subject to the license and copyright
|
||||
detailed in the LICENSE and NOTICE files at the root of the source
|
||||
tree and available online at
|
||||
|
||||
http://www.dspace.org/license/
|
||||
-->
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
|
||||
<description>
|
||||
User-Agents to be marked as "spiders" in statistics.
|
||||
</description>
|
||||
|
||||
<bean class="org.dspace.statistics.util.SpiderDetector">
|
||||
<property name="AgentPatterns">
|
||||
<description>
|
||||
java.util.regex regular expression patterns to match User-Agent
|
||||
headers of known spiders.
|
||||
</description>
|
||||
<list>
|
||||
<value>^msnbot</value>
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
</beans>
|
@@ -96,6 +96,18 @@
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>com.mycila.maven-license-plugin</groupId>
|
||||
<artifactId>maven-license-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>**/src/test/resources/**</exclude>
|
||||
<exclude>**/src/test/data/**</exclude>
|
||||
<exclude>**/.gitignore</exclude>
|
||||
<exclude>src/test/data/dspaceFolder/config/spiders/**</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- This plugin allows us to run a Groovy script in our Maven POM
|
||||
(see: http://gmaven.codehaus.org/Executing+Groovy+Code )
|
||||
We are generating a OS-agnostic version (agnostic.build.dir) of
|
||||
|
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.statistics.util;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Holds a list of pattern strings.
|
||||
*
|
||||
* @author mwood
|
||||
*/
|
||||
public class AgentPatternList {
|
||||
private final List<String> patterns;
|
||||
|
||||
private AgentPatternList() { patterns = null; }
|
||||
|
||||
public AgentPatternList(List<String> patterns)
|
||||
{
|
||||
this.patterns = patterns;
|
||||
}
|
||||
|
||||
public List<String> getPatterns()
|
||||
{
|
||||
return patterns;
|
||||
}
|
||||
}
|
@@ -83,37 +83,6 @@ public class SpiderDetector {
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack a list of lists of patterns and compile them to Patterns.
|
||||
* We have to do the list-of-lists to get Spring to accumulate them across
|
||||
* configuration files.
|
||||
*
|
||||
* @param agentPatterns
|
||||
* @throws PatternSyntaxExpression
|
||||
*/
|
||||
static public void setAgentPatterns(List<AgentPatternList> agentPatternLists)
|
||||
{
|
||||
clearAgentPatterns();
|
||||
|
||||
for (AgentPatternList agentPatterns : agentPatternLists)
|
||||
{
|
||||
for (String agentPattern : agentPatterns.getPatterns())
|
||||
{
|
||||
Pattern newPattern = Pattern.compile(agentPattern);
|
||||
agents.add(newPattern);
|
||||
}
|
||||
}
|
||||
log.info("Received " + String.valueOf(agents.size()) + " agent patterns.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Empty the agent pattern list.
|
||||
*/
|
||||
static void clearAgentPatterns()
|
||||
{
|
||||
agents.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an immutable Set representing all the Spider Addresses here
|
||||
*
|
||||
@@ -141,21 +110,24 @@ public class SpiderDetector {
|
||||
|
||||
if (spidersDir.exists() && spidersDir.isDirectory()) {
|
||||
for (File file : spidersDir.listFiles()) {
|
||||
for (String ip : readPatterns(file)) {
|
||||
log.debug("Loading {}", ip);
|
||||
if (!Character.isDigit(ip.charAt(0)))
|
||||
{
|
||||
try {
|
||||
ip = DnsLookup.forward(ip);
|
||||
log.debug("Resolved to {}", ip);
|
||||
} catch (IOException e) {
|
||||
log.warn("Not loading {}: {}", ip, e.getMessage());
|
||||
continue;
|
||||
if (file.isFile())
|
||||
{
|
||||
for (String ip : readPatterns(file)) {
|
||||
log.debug("Loading {}", ip);
|
||||
if (!Character.isDigit(ip.charAt(0)))
|
||||
{
|
||||
try {
|
||||
ip = DnsLookup.forward(ip);
|
||||
log.debug("Resolved to {}", ip);
|
||||
} catch (IOException e) {
|
||||
log.warn("Not loading {}: {}", ip, e.getMessage());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
table.add(ip);
|
||||
}
|
||||
table.add(ip);
|
||||
log.info("Loaded Spider IP file: " + file);
|
||||
}
|
||||
log.info("Loaded Spider IP file: " + file);
|
||||
}
|
||||
} else {
|
||||
log.info("No spider file loaded");
|
||||
@@ -169,7 +141,15 @@ public class SpiderDetector {
|
||||
|
||||
}
|
||||
|
||||
/** Load agent name patterns from all files in a single subdirectory of config/spiders. */
|
||||
/**
|
||||
* Load agent name patterns from all files in a single subdirectory of config/spiders.
|
||||
*
|
||||
* @param directory simple directory name (e.g. "agents").
|
||||
* "${dspace.dir}/config/spiders" will be prepended to yield the path to
|
||||
* the directory of pattern files.
|
||||
* @param patternList patterns read from the files in {@code directory} will
|
||||
* be added to this List.
|
||||
*/
|
||||
private static void loadPatterns(String directory, List<Pattern> patternList)
|
||||
{
|
||||
String dspaceHome = ConfigurationManager.getProperty("dspace.dir");
|
||||
@@ -193,12 +173,15 @@ public class SpiderDetector {
|
||||
{
|
||||
patternList.add(Pattern.compile(pattern));
|
||||
}
|
||||
log.info("Loaded pattern file: {}", file.getPath());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
log.info("No patterns loaded from {}", patternsDir.getPath());
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO Load host name patterns from all files in config/spiders/dns. */
|
||||
|
||||
/**
|
||||
* Static Service Method for testing spiders against existing spider files.
|
||||
* <p>
|
||||
@@ -251,7 +234,7 @@ public class SpiderDetector {
|
||||
|
||||
for (Pattern candidate : domains)
|
||||
{
|
||||
if (candidate.matcher(hostname).find())
|
||||
if (candidate.matcher(hostname).find()) // XXX anchored?
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@@ -0,0 +1 @@
|
||||
^msnbot
|
@@ -0,0 +1 @@
|
||||
^baiduspider-.*\.crawl\.baidu\.com
|
@@ -0,0 +1,2 @@
|
||||
# For testing
|
||||
192.168.2.1
|
@@ -1,61 +0,0 @@
|
||||
/**
|
||||
* The contents of this file are subject to the license and copyright
|
||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
||||
* tree and available online at
|
||||
*
|
||||
* http://www.dspace.org/license/
|
||||
*/
|
||||
package org.dspace.statistics.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.junit.*;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author mwood
|
||||
*/
|
||||
public class AgentPatternListTest
|
||||
{
|
||||
|
||||
public AgentPatternListTest()
|
||||
{
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass()
|
||||
throws Exception
|
||||
{
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownClass()
|
||||
throws Exception
|
||||
{
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setUp()
|
||||
{
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Test of getPatterns method, of class AgentPatternList.
|
||||
*/
|
||||
@Test
|
||||
public void testGetPatterns()
|
||||
{
|
||||
System.out.println("getPatterns");
|
||||
List<String> expResult = new ArrayList<String>();
|
||||
AgentPatternList instance = new AgentPatternList(expResult);
|
||||
List result = instance.getPatterns();
|
||||
assertEquals(expResult, result);
|
||||
}
|
||||
}
|
@@ -32,6 +32,8 @@ class DummyHttpServletRequest implements HttpServletRequest
|
||||
|
||||
private String address = null;
|
||||
|
||||
private String remoteHost = null;
|
||||
|
||||
public void setAgent(String agent)
|
||||
{
|
||||
this.agent = agent;
|
||||
@@ -42,6 +44,11 @@ class DummyHttpServletRequest implements HttpServletRequest
|
||||
this.address = address;
|
||||
}
|
||||
|
||||
public void setRemoteHost(String host)
|
||||
{
|
||||
this.remoteHost = host;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see javax.servlet.http.HttpServletRequest#getAuthType()
|
||||
*/
|
||||
@@ -463,8 +470,7 @@ class DummyHttpServletRequest implements HttpServletRequest
|
||||
@Override
|
||||
public String getRemoteHost()
|
||||
{
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
return remoteHost;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@@ -24,19 +24,10 @@ public class SpiderDetectorTest
|
||||
{
|
||||
|
||||
/**
|
||||
* Test method for {@link org.dspace.statistics.util.SpiderDetector#readIpAddresses(java.io.File)}.
|
||||
* Test method for {@link org.dspace.statistics.util.SpiderDetector#readPatterns(java.io.File)}.
|
||||
*/
|
||||
@Test
|
||||
public void testReadIpAddresses()
|
||||
{
|
||||
// FIXME fail("Not yet implemented");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test method for {@link org.dspace.statistics.util.SpiderDetector#setAgentPatterns(java.util.List)}.
|
||||
*/
|
||||
@Test
|
||||
public void testSetAgentPatterns()
|
||||
public void testReadPatterns()
|
||||
{
|
||||
// FIXME fail("Not yet implemented");
|
||||
}
|
||||
@@ -58,25 +49,38 @@ public class SpiderDetectorTest
|
||||
{
|
||||
Mockit.setUpMocks(MockSolrLogger.class); // Don't test SolrLogger here
|
||||
|
||||
final String NOT_A_BOT_ADDRESS = "192.168.0.1";
|
||||
|
||||
DummyHttpServletRequest req = new DummyHttpServletRequest();
|
||||
req.setAddress("192.168.0.1"); // avoid surprises
|
||||
req.setAddress(NOT_A_BOT_ADDRESS); // avoid surprises
|
||||
req.setRemoteHost("notabot.example.com"); // avoid surprises
|
||||
req.setAgent("Firefox"); // avoid surprises
|
||||
|
||||
// Some pattern strings
|
||||
List<String> testPatterns = new ArrayList<String>();
|
||||
testPatterns.add("^msnbot");
|
||||
// Wrap it in an AgentPatternList
|
||||
AgentPatternList patternList = new AgentPatternList(testPatterns);
|
||||
List<AgentPatternList> patternLists = new ArrayList<AgentPatternList>();
|
||||
patternLists.add(patternList);
|
||||
// Test!
|
||||
SpiderDetector.clearAgentPatterns(); // start fresh, in case Spring is active
|
||||
SpiderDetector.setAgentPatterns(patternLists);
|
||||
String candidate;
|
||||
|
||||
// Test agent patterns
|
||||
req.setAgent("msnbot is watching you");
|
||||
assertTrue("'msnbot' did not match any pattern", SpiderDetector.isSpider(req));
|
||||
|
||||
req.setAgent("Firefox");
|
||||
assertFalse("'Firefox' matched a pattern", SpiderDetector.isSpider(req));
|
||||
|
||||
// Test IP patterns
|
||||
candidate = "192.168.2.1";
|
||||
req.setAddress(candidate);
|
||||
assertTrue(candidate + " did not match IP patterns", SpiderDetector.isSpider(req));
|
||||
|
||||
req.setAddress(NOT_A_BOT_ADDRESS);
|
||||
assertFalse(NOT_A_BOT_ADDRESS + " matched IP patterns", SpiderDetector.isSpider(req));
|
||||
|
||||
// Test DNS patterns
|
||||
candidate = "baiduspider-dspace-test.crawl.baidu.com";
|
||||
req.setRemoteHost(candidate);
|
||||
assertTrue(candidate + " did not match DNS patterns", SpiderDetector.isSpider(req));
|
||||
|
||||
candidate = "wiki.dspace.org";
|
||||
req.setRemoteHost(candidate);
|
||||
assertFalse(candidate + " matched DNS patterns", SpiderDetector.isSpider(req));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -13,7 +13,7 @@
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
|
||||
<!--
|
||||
Package DSpace's common testing environment (configuration, etc.)
|
||||
Package DSpace's common testing environment (configuration, etc.)
|
||||
-->
|
||||
<id>testEnvironment</id>
|
||||
<formats>
|
||||
@@ -49,9 +49,6 @@
|
||||
<fileSet> <!-- test data -->
|
||||
<directory>src/test/data/dspaceFolder</directory>
|
||||
<outputDirectory />
|
||||
<includes>
|
||||
<include>assetstore/**/*</include>
|
||||
</includes>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
</sources>
|
||||
|
Reference in New Issue
Block a user