mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-17 15:03:18 +00:00
Update testing for new features; remove Spring-based configuration.
This commit is contained in:
@@ -1,28 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!--
|
|
||||||
The contents of this file are subject to the license and copyright
|
|
||||||
detailed in the LICENSE and NOTICE files at the root of the source
|
|
||||||
tree and available online at
|
|
||||||
|
|
||||||
http://www.dspace.org/license/
|
|
||||||
-->
|
|
||||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
|
|
||||||
<description>
|
|
||||||
User-Agents to be marked as "spiders" in statistics.
|
|
||||||
</description>
|
|
||||||
|
|
||||||
<bean class="org.dspace.statistics.util.SpiderDetector">
|
|
||||||
<property name="AgentPatterns">
|
|
||||||
<description>
|
|
||||||
java.util.regex regular expression patterns to match User-Agent
|
|
||||||
headers of known spiders.
|
|
||||||
</description>
|
|
||||||
<list>
|
|
||||||
<value>^msnbot</value>
|
|
||||||
</list>
|
|
||||||
</property>
|
|
||||||
</bean>
|
|
||||||
|
|
||||||
</beans>
|
|
@@ -96,6 +96,18 @@
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.mycila.maven-license-plugin</groupId>
|
||||||
|
<artifactId>maven-license-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<excludes>
|
||||||
|
<exclude>**/src/test/resources/**</exclude>
|
||||||
|
<exclude>**/src/test/data/**</exclude>
|
||||||
|
<exclude>**/.gitignore</exclude>
|
||||||
|
<exclude>src/test/data/dspaceFolder/config/spiders/**</exclude>
|
||||||
|
</excludes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
<!-- This plugin allows us to run a Groovy script in our Maven POM
|
<!-- This plugin allows us to run a Groovy script in our Maven POM
|
||||||
(see: http://gmaven.codehaus.org/Executing+Groovy+Code )
|
(see: http://gmaven.codehaus.org/Executing+Groovy+Code )
|
||||||
We are generating a OS-agnostic version (agnostic.build.dir) of
|
We are generating a OS-agnostic version (agnostic.build.dir) of
|
||||||
|
@@ -1,31 +0,0 @@
|
|||||||
/**
|
|
||||||
* The contents of this file are subject to the license and copyright
|
|
||||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
|
||||||
* tree and available online at
|
|
||||||
*
|
|
||||||
* http://www.dspace.org/license/
|
|
||||||
*/
|
|
||||||
package org.dspace.statistics.util;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds a list of pattern strings.
|
|
||||||
*
|
|
||||||
* @author mwood
|
|
||||||
*/
|
|
||||||
public class AgentPatternList {
|
|
||||||
private final List<String> patterns;
|
|
||||||
|
|
||||||
private AgentPatternList() { patterns = null; }
|
|
||||||
|
|
||||||
public AgentPatternList(List<String> patterns)
|
|
||||||
{
|
|
||||||
this.patterns = patterns;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getPatterns()
|
|
||||||
{
|
|
||||||
return patterns;
|
|
||||||
}
|
|
||||||
}
|
|
@@ -83,37 +83,6 @@ public class SpiderDetector {
|
|||||||
return patterns;
|
return patterns;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Unpack a list of lists of patterns and compile them to Patterns.
|
|
||||||
* We have to do the list-of-lists to get Spring to accumulate them across
|
|
||||||
* configuration files.
|
|
||||||
*
|
|
||||||
* @param agentPatterns
|
|
||||||
* @throws PatternSyntaxExpression
|
|
||||||
*/
|
|
||||||
static public void setAgentPatterns(List<AgentPatternList> agentPatternLists)
|
|
||||||
{
|
|
||||||
clearAgentPatterns();
|
|
||||||
|
|
||||||
for (AgentPatternList agentPatterns : agentPatternLists)
|
|
||||||
{
|
|
||||||
for (String agentPattern : agentPatterns.getPatterns())
|
|
||||||
{
|
|
||||||
Pattern newPattern = Pattern.compile(agentPattern);
|
|
||||||
agents.add(newPattern);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log.info("Received " + String.valueOf(agents.size()) + " agent patterns.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empty the agent pattern list.
|
|
||||||
*/
|
|
||||||
static void clearAgentPatterns()
|
|
||||||
{
|
|
||||||
agents.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get an immutable Set representing all the Spider Addresses here
|
* Get an immutable Set representing all the Spider Addresses here
|
||||||
*
|
*
|
||||||
@@ -141,21 +110,24 @@ public class SpiderDetector {
|
|||||||
|
|
||||||
if (spidersDir.exists() && spidersDir.isDirectory()) {
|
if (spidersDir.exists() && spidersDir.isDirectory()) {
|
||||||
for (File file : spidersDir.listFiles()) {
|
for (File file : spidersDir.listFiles()) {
|
||||||
for (String ip : readPatterns(file)) {
|
if (file.isFile())
|
||||||
log.debug("Loading {}", ip);
|
{
|
||||||
if (!Character.isDigit(ip.charAt(0)))
|
for (String ip : readPatterns(file)) {
|
||||||
{
|
log.debug("Loading {}", ip);
|
||||||
try {
|
if (!Character.isDigit(ip.charAt(0)))
|
||||||
ip = DnsLookup.forward(ip);
|
{
|
||||||
log.debug("Resolved to {}", ip);
|
try {
|
||||||
} catch (IOException e) {
|
ip = DnsLookup.forward(ip);
|
||||||
log.warn("Not loading {}: {}", ip, e.getMessage());
|
log.debug("Resolved to {}", ip);
|
||||||
continue;
|
} catch (IOException e) {
|
||||||
|
log.warn("Not loading {}: {}", ip, e.getMessage());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
table.add(ip);
|
||||||
}
|
}
|
||||||
table.add(ip);
|
log.info("Loaded Spider IP file: " + file);
|
||||||
}
|
}
|
||||||
log.info("Loaded Spider IP file: " + file);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.info("No spider file loaded");
|
log.info("No spider file loaded");
|
||||||
@@ -169,7 +141,15 @@ public class SpiderDetector {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Load agent name patterns from all files in a single subdirectory of config/spiders. */
|
/**
|
||||||
|
* Load agent name patterns from all files in a single subdirectory of config/spiders.
|
||||||
|
*
|
||||||
|
* @param directory simple directory name (e.g. "agents").
|
||||||
|
* "${dspace.dir}/config/spiders" will be prepended to yield the path to
|
||||||
|
* the directory of pattern files.
|
||||||
|
* @param patternList patterns read from the files in {@code directory} will
|
||||||
|
* be added to this List.
|
||||||
|
*/
|
||||||
private static void loadPatterns(String directory, List<Pattern> patternList)
|
private static void loadPatterns(String directory, List<Pattern> patternList)
|
||||||
{
|
{
|
||||||
String dspaceHome = ConfigurationManager.getProperty("dspace.dir");
|
String dspaceHome = ConfigurationManager.getProperty("dspace.dir");
|
||||||
@@ -193,12 +173,15 @@ public class SpiderDetector {
|
|||||||
{
|
{
|
||||||
patternList.add(Pattern.compile(pattern));
|
patternList.add(Pattern.compile(pattern));
|
||||||
}
|
}
|
||||||
|
log.info("Loaded pattern file: {}", file.getPath());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log.info("No patterns loaded from {}", patternsDir.getPath());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO Load host name patterns from all files in config/spiders/dns. */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Static Service Method for testing spiders against existing spider files.
|
* Static Service Method for testing spiders against existing spider files.
|
||||||
* <p>
|
* <p>
|
||||||
@@ -251,7 +234,7 @@ public class SpiderDetector {
|
|||||||
|
|
||||||
for (Pattern candidate : domains)
|
for (Pattern candidate : domains)
|
||||||
{
|
{
|
||||||
if (candidate.matcher(hostname).find())
|
if (candidate.matcher(hostname).find()) // XXX anchored?
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -0,0 +1 @@
|
|||||||
|
^msnbot
|
@@ -0,0 +1 @@
|
|||||||
|
^baiduspider-.*\.crawl\.baidu\.com
|
@@ -0,0 +1,2 @@
|
|||||||
|
# For testing
|
||||||
|
192.168.2.1
|
@@ -1,61 +0,0 @@
|
|||||||
/**
|
|
||||||
* The contents of this file are subject to the license and copyright
|
|
||||||
* detailed in the LICENSE and NOTICE files at the root of the source
|
|
||||||
* tree and available online at
|
|
||||||
*
|
|
||||||
* http://www.dspace.org/license/
|
|
||||||
*/
|
|
||||||
package org.dspace.statistics.util;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import org.junit.*;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @author mwood
|
|
||||||
*/
|
|
||||||
public class AgentPatternListTest
|
|
||||||
{
|
|
||||||
|
|
||||||
public AgentPatternListTest()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void setUpClass()
|
|
||||||
throws Exception
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterClass
|
|
||||||
public static void tearDownClass()
|
|
||||||
throws Exception
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setUp()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
@After
|
|
||||||
public void tearDown()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test of getPatterns method, of class AgentPatternList.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testGetPatterns()
|
|
||||||
{
|
|
||||||
System.out.println("getPatterns");
|
|
||||||
List<String> expResult = new ArrayList<String>();
|
|
||||||
AgentPatternList instance = new AgentPatternList(expResult);
|
|
||||||
List result = instance.getPatterns();
|
|
||||||
assertEquals(expResult, result);
|
|
||||||
}
|
|
||||||
}
|
|
@@ -32,6 +32,8 @@ class DummyHttpServletRequest implements HttpServletRequest
|
|||||||
|
|
||||||
private String address = null;
|
private String address = null;
|
||||||
|
|
||||||
|
private String remoteHost = null;
|
||||||
|
|
||||||
public void setAgent(String agent)
|
public void setAgent(String agent)
|
||||||
{
|
{
|
||||||
this.agent = agent;
|
this.agent = agent;
|
||||||
@@ -42,6 +44,11 @@ class DummyHttpServletRequest implements HttpServletRequest
|
|||||||
this.address = address;
|
this.address = address;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setRemoteHost(String host)
|
||||||
|
{
|
||||||
|
this.remoteHost = host;
|
||||||
|
}
|
||||||
|
|
||||||
/* (non-Javadoc)
|
/* (non-Javadoc)
|
||||||
* @see javax.servlet.http.HttpServletRequest#getAuthType()
|
* @see javax.servlet.http.HttpServletRequest#getAuthType()
|
||||||
*/
|
*/
|
||||||
@@ -463,8 +470,7 @@ class DummyHttpServletRequest implements HttpServletRequest
|
|||||||
@Override
|
@Override
|
||||||
public String getRemoteHost()
|
public String getRemoteHost()
|
||||||
{
|
{
|
||||||
// TODO Auto-generated method stub
|
return remoteHost;
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* (non-Javadoc)
|
/* (non-Javadoc)
|
||||||
|
@@ -24,19 +24,10 @@ public class SpiderDetectorTest
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test method for {@link org.dspace.statistics.util.SpiderDetector#readIpAddresses(java.io.File)}.
|
* Test method for {@link org.dspace.statistics.util.SpiderDetector#readPatterns(java.io.File)}.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testReadIpAddresses()
|
public void testReadPatterns()
|
||||||
{
|
|
||||||
// FIXME fail("Not yet implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test method for {@link org.dspace.statistics.util.SpiderDetector#setAgentPatterns(java.util.List)}.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testSetAgentPatterns()
|
|
||||||
{
|
{
|
||||||
// FIXME fail("Not yet implemented");
|
// FIXME fail("Not yet implemented");
|
||||||
}
|
}
|
||||||
@@ -58,25 +49,38 @@ public class SpiderDetectorTest
|
|||||||
{
|
{
|
||||||
Mockit.setUpMocks(MockSolrLogger.class); // Don't test SolrLogger here
|
Mockit.setUpMocks(MockSolrLogger.class); // Don't test SolrLogger here
|
||||||
|
|
||||||
|
final String NOT_A_BOT_ADDRESS = "192.168.0.1";
|
||||||
|
|
||||||
DummyHttpServletRequest req = new DummyHttpServletRequest();
|
DummyHttpServletRequest req = new DummyHttpServletRequest();
|
||||||
req.setAddress("192.168.0.1"); // avoid surprises
|
req.setAddress(NOT_A_BOT_ADDRESS); // avoid surprises
|
||||||
|
req.setRemoteHost("notabot.example.com"); // avoid surprises
|
||||||
|
req.setAgent("Firefox"); // avoid surprises
|
||||||
|
|
||||||
// Some pattern strings
|
String candidate;
|
||||||
List<String> testPatterns = new ArrayList<String>();
|
|
||||||
testPatterns.add("^msnbot");
|
|
||||||
// Wrap it in an AgentPatternList
|
|
||||||
AgentPatternList patternList = new AgentPatternList(testPatterns);
|
|
||||||
List<AgentPatternList> patternLists = new ArrayList<AgentPatternList>();
|
|
||||||
patternLists.add(patternList);
|
|
||||||
// Test!
|
|
||||||
SpiderDetector.clearAgentPatterns(); // start fresh, in case Spring is active
|
|
||||||
SpiderDetector.setAgentPatterns(patternLists);
|
|
||||||
|
|
||||||
|
// Test agent patterns
|
||||||
req.setAgent("msnbot is watching you");
|
req.setAgent("msnbot is watching you");
|
||||||
assertTrue("'msnbot' did not match any pattern", SpiderDetector.isSpider(req));
|
assertTrue("'msnbot' did not match any pattern", SpiderDetector.isSpider(req));
|
||||||
|
|
||||||
req.setAgent("Firefox");
|
req.setAgent("Firefox");
|
||||||
assertFalse("'Firefox' matched a pattern", SpiderDetector.isSpider(req));
|
assertFalse("'Firefox' matched a pattern", SpiderDetector.isSpider(req));
|
||||||
|
|
||||||
|
// Test IP patterns
|
||||||
|
candidate = "192.168.2.1";
|
||||||
|
req.setAddress(candidate);
|
||||||
|
assertTrue(candidate + " did not match IP patterns", SpiderDetector.isSpider(req));
|
||||||
|
|
||||||
|
req.setAddress(NOT_A_BOT_ADDRESS);
|
||||||
|
assertFalse(NOT_A_BOT_ADDRESS + " matched IP patterns", SpiderDetector.isSpider(req));
|
||||||
|
|
||||||
|
// Test DNS patterns
|
||||||
|
candidate = "baiduspider-dspace-test.crawl.baidu.com";
|
||||||
|
req.setRemoteHost(candidate);
|
||||||
|
assertTrue(candidate + " did not match DNS patterns", SpiderDetector.isSpider(req));
|
||||||
|
|
||||||
|
candidate = "wiki.dspace.org";
|
||||||
|
req.setRemoteHost(candidate);
|
||||||
|
assertFalse(candidate + " matched DNS patterns", SpiderDetector.isSpider(req));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -13,7 +13,7 @@
|
|||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
|
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
|
||||||
<!--
|
<!--
|
||||||
Package DSpace's common testing environment (configuration, etc.)
|
Package DSpace's common testing environment (configuration, etc.)
|
||||||
-->
|
-->
|
||||||
<id>testEnvironment</id>
|
<id>testEnvironment</id>
|
||||||
<formats>
|
<formats>
|
||||||
@@ -49,9 +49,6 @@
|
|||||||
<fileSet> <!-- test data -->
|
<fileSet> <!-- test data -->
|
||||||
<directory>src/test/data/dspaceFolder</directory>
|
<directory>src/test/data/dspaceFolder</directory>
|
||||||
<outputDirectory />
|
<outputDirectory />
|
||||||
<includes>
|
|
||||||
<include>assetstore/**/*</include>
|
|
||||||
</includes>
|
|
||||||
</fileSet>
|
</fileSet>
|
||||||
</fileSets>
|
</fileSets>
|
||||||
</sources>
|
</sources>
|
||||||
|
Reference in New Issue
Block a user