Update testing for new features; remove Spring-based configuration.

This commit is contained in:
Mark H. Wood
2013-06-21 15:31:55 -04:00
parent b0fbceed01
commit 1bb3cbc4e5
11 changed files with 81 additions and 195 deletions

View File

@@ -1,28 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
The contents of this file are subject to the license and copyright
detailed in the LICENSE and NOTICE files at the root of the source
tree and available online at
http://www.dspace.org/license/
-->
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
<description>
User-Agents to be marked as "spiders" in statistics.
</description>
<bean class="org.dspace.statistics.util.SpiderDetector">
<property name="AgentPatterns">
<description>
java.util.regex regular expression patterns to match User-Agent
headers of known spiders.
</description>
<list>
<value>^msnbot</value>
</list>
</property>
</bean>
</beans>

View File

@@ -96,6 +96,18 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.mycila.maven-license-plugin</groupId>
<artifactId>maven-license-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/src/test/resources/**</exclude>
<exclude>**/src/test/data/**</exclude>
<exclude>**/.gitignore</exclude>
<exclude>src/test/data/dspaceFolder/config/spiders/**</exclude>
</excludes>
</configuration>
</plugin>
<!-- This plugin allows us to run a Groovy script in our Maven POM
(see: http://gmaven.codehaus.org/Executing+Groovy+Code )
We are generating a OS-agnostic version (agnostic.build.dir) of

View File

@@ -1,31 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import java.util.List;
/**
* Holds a list of pattern strings.
*
* @author mwood
*/
public class AgentPatternList {
private final List<String> patterns;
private AgentPatternList() { patterns = null; }
public AgentPatternList(List<String> patterns)
{
this.patterns = patterns;
}
public List<String> getPatterns()
{
return patterns;
}
}

View File

@@ -83,37 +83,6 @@ public class SpiderDetector {
return patterns;
}
/**
* Unpack a list of lists of patterns and compile them to Patterns.
* We have to do the list-of-lists to get Spring to accumulate them across
* configuration files.
*
* @param agentPatterns
* @throws PatternSyntaxExpression
*/
static public void setAgentPatterns(List<AgentPatternList> agentPatternLists)
{
clearAgentPatterns();
for (AgentPatternList agentPatterns : agentPatternLists)
{
for (String agentPattern : agentPatterns.getPatterns())
{
Pattern newPattern = Pattern.compile(agentPattern);
agents.add(newPattern);
}
}
log.info("Received " + String.valueOf(agents.size()) + " agent patterns.");
}
/**
* Empty the agent pattern list.
*/
static void clearAgentPatterns()
{
agents.clear();
}
/**
* Get an immutable Set representing all the Spider Addresses here
*
@@ -141,21 +110,24 @@ public class SpiderDetector {
if (spidersDir.exists() && spidersDir.isDirectory()) {
for (File file : spidersDir.listFiles()) {
for (String ip : readPatterns(file)) {
log.debug("Loading {}", ip);
if (!Character.isDigit(ip.charAt(0)))
{
try {
ip = DnsLookup.forward(ip);
log.debug("Resolved to {}", ip);
} catch (IOException e) {
log.warn("Not loading {}: {}", ip, e.getMessage());
continue;
if (file.isFile())
{
for (String ip : readPatterns(file)) {
log.debug("Loading {}", ip);
if (!Character.isDigit(ip.charAt(0)))
{
try {
ip = DnsLookup.forward(ip);
log.debug("Resolved to {}", ip);
} catch (IOException e) {
log.warn("Not loading {}: {}", ip, e.getMessage());
continue;
}
}
table.add(ip);
}
table.add(ip);
log.info("Loaded Spider IP file: " + file);
}
log.info("Loaded Spider IP file: " + file);
}
} else {
log.info("No spider file loaded");
@@ -169,7 +141,15 @@ public class SpiderDetector {
}
/** Load agent name patterns from all files in a single subdirectory of config/spiders. */
/**
* Load agent name patterns from all files in a single subdirectory of config/spiders.
*
* @param directory simple directory name (e.g. "agents").
* "${dspace.dir}/config/spiders" will be prepended to yield the path to
* the directory of pattern files.
* @param patternList patterns read from the files in {@code directory} will
* be added to this List.
*/
private static void loadPatterns(String directory, List<Pattern> patternList)
{
String dspaceHome = ConfigurationManager.getProperty("dspace.dir");
@@ -193,12 +173,15 @@ public class SpiderDetector {
{
patternList.add(Pattern.compile(pattern));
}
log.info("Loaded pattern file: {}", file.getPath());
}
}
else
{
log.info("No patterns loaded from {}", patternsDir.getPath());
}
}
/* TODO Load host name patterns from all files in config/spiders/dns. */
/**
* Static Service Method for testing spiders against existing spider files.
* <p>
@@ -251,7 +234,7 @@ public class SpiderDetector {
for (Pattern candidate : domains)
{
if (candidate.matcher(hostname).find())
if (candidate.matcher(hostname).find()) // XXX anchored?
{
return true;
}

View File

@@ -0,0 +1 @@
^baiduspider-.*\.crawl\.baidu\.com

View File

@@ -0,0 +1,2 @@
# For testing
192.168.2.1

View File

@@ -1,61 +0,0 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.statistics.util;
import java.util.ArrayList;
import java.util.List;
import org.junit.*;
import static org.junit.Assert.assertEquals;
/**
*
* @author mwood
*/
public class AgentPatternListTest
{
public AgentPatternListTest()
{
}
@BeforeClass
public static void setUpClass()
throws Exception
{
}
@AfterClass
public static void tearDownClass()
throws Exception
{
}
@Before
public void setUp()
{
}
@After
public void tearDown()
{
}
/**
* Test of getPatterns method, of class AgentPatternList.
*/
@Test
public void testGetPatterns()
{
System.out.println("getPatterns");
List<String> expResult = new ArrayList<String>();
AgentPatternList instance = new AgentPatternList(expResult);
List result = instance.getPatterns();
assertEquals(expResult, result);
}
}

View File

@@ -32,6 +32,8 @@ class DummyHttpServletRequest implements HttpServletRequest
private String address = null;
private String remoteHost = null;
public void setAgent(String agent)
{
this.agent = agent;
@@ -42,6 +44,11 @@ class DummyHttpServletRequest implements HttpServletRequest
this.address = address;
}
public void setRemoteHost(String host)
{
this.remoteHost = host;
}
/* (non-Javadoc)
* @see javax.servlet.http.HttpServletRequest#getAuthType()
*/
@@ -463,8 +470,7 @@ class DummyHttpServletRequest implements HttpServletRequest
@Override
public String getRemoteHost()
{
// TODO Auto-generated method stub
return null;
return remoteHost;
}
/* (non-Javadoc)

View File

@@ -24,19 +24,10 @@ public class SpiderDetectorTest
{
/**
* Test method for {@link org.dspace.statistics.util.SpiderDetector#readIpAddresses(java.io.File)}.
* Test method for {@link org.dspace.statistics.util.SpiderDetector#readPatterns(java.io.File)}.
*/
@Test
public void testReadIpAddresses()
{
// FIXME fail("Not yet implemented");
}
/**
* Test method for {@link org.dspace.statistics.util.SpiderDetector#setAgentPatterns(java.util.List)}.
*/
@Test
public void testSetAgentPatterns()
public void testReadPatterns()
{
// FIXME fail("Not yet implemented");
}
@@ -58,25 +49,38 @@ public class SpiderDetectorTest
{
Mockit.setUpMocks(MockSolrLogger.class); // Don't test SolrLogger here
final String NOT_A_BOT_ADDRESS = "192.168.0.1";
DummyHttpServletRequest req = new DummyHttpServletRequest();
req.setAddress("192.168.0.1"); // avoid surprises
req.setAddress(NOT_A_BOT_ADDRESS); // avoid surprises
req.setRemoteHost("notabot.example.com"); // avoid surprises
req.setAgent("Firefox"); // avoid surprises
// Some pattern strings
List<String> testPatterns = new ArrayList<String>();
testPatterns.add("^msnbot");
// Wrap it in an AgentPatternList
AgentPatternList patternList = new AgentPatternList(testPatterns);
List<AgentPatternList> patternLists = new ArrayList<AgentPatternList>();
patternLists.add(patternList);
// Test!
SpiderDetector.clearAgentPatterns(); // start fresh, in case Spring is active
SpiderDetector.setAgentPatterns(patternLists);
String candidate;
// Test agent patterns
req.setAgent("msnbot is watching you");
assertTrue("'msnbot' did not match any pattern", SpiderDetector.isSpider(req));
req.setAgent("Firefox");
assertFalse("'Firefox' matched a pattern", SpiderDetector.isSpider(req));
// Test IP patterns
candidate = "192.168.2.1";
req.setAddress(candidate);
assertTrue(candidate + " did not match IP patterns", SpiderDetector.isSpider(req));
req.setAddress(NOT_A_BOT_ADDRESS);
assertFalse(NOT_A_BOT_ADDRESS + " matched IP patterns", SpiderDetector.isSpider(req));
// Test DNS patterns
candidate = "baiduspider-dspace-test.crawl.baidu.com";
req.setRemoteHost(candidate);
assertTrue(candidate + " did not match DNS patterns", SpiderDetector.isSpider(req));
candidate = "wiki.dspace.org";
req.setRemoteHost(candidate);
assertFalse(candidate + " matched DNS patterns", SpiderDetector.isSpider(req));
}
/**

View File

@@ -13,7 +13,7 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
<!--
Package DSpace's common testing environment (configuration, etc.)
Package DSpace's common testing environment (configuration, etc.)
-->
<id>testEnvironment</id>
<formats>
@@ -49,9 +49,6 @@
<fileSet> <!-- test data -->
<directory>src/test/data/dspaceFolder</directory>
<outputDirectory />
<includes>
<include>assetstore/**/*</include>
</includes>
</fileSet>
</fileSets>
</sources>