[DS-440] Add Launcher entry and adjust command names to be uniform for stats tools. Consolidate DownloadSpiderIPs into StatisticsClient.

git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@4754 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
Mark Diggory
2010-02-09 07:36:43 +00:00
parent 0cc539d981
commit ce47431200
3 changed files with 86 additions and 142 deletions

View File

@@ -1,114 +0,0 @@
/*
* DownloadSpiderIPs.java
*
* Version: $Revision:$
*
* Date: $Date:$
*
* Copyright (c) 2002-2010, The DSpace Foundation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.statistics.util;
import org.apache.tools.ant.taskdefs.Get;
import org.dspace.core.ConfigurationManager;
import java.io.*;
import java.net.InetAddress;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Vector;
/**
* Class to download and process lists of search engine spider IP addresses
*
* @author Stuart Lewis
* @author Mark Diggory (mdiggory at atmire.com)
*/
public class DownloadSpiderIPs
{
/** Vector of entries */
private static Vector<String> ips;
/**
* Main method to run the script
*
* @param args The command line arguments
*/
public static void main(String[] args)
{
try
{
System.out.println("Downloading latest spider IP addresses:");
ips = new Vector<String>();
// Get the list URLs to download from
String urls = ConfigurationManager.getProperty("solr.spiderips.urls");
if ((urls == null) || ("".equals(urls)))
{
System.err.println(" - Missing setting from dspace.cfg: solr.spiderips.urls");
System.exit(0);
}
// Get the location of spiders directory
File spiders = new File(ConfigurationManager.getProperty("dspace.dir"),"config/spiders");
if(!spiders.exists())
spiders.mkdirs();
String[] values = urls.split(",");
for (String value : values)
{
value = value.trim();
System.out.println(" Downloading: " + value);
URL url = new URL(value);
Get get = new Get();
get.setDest(new File(spiders, url.getHost() + url.getPath().replace("/","-")));
get.setSrc(url);
get.setUseTimestamp(true);
get.execute();
}
} catch (Exception e)
{
System.err.println(" - Error: " + e.getMessage());
e.printStackTrace();
System.exit(1);
}
}
}

View File

@@ -45,6 +45,7 @@ import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.tools.ant.taskdefs.Get;
import org.dspace.content.*; import org.dspace.content.*;
import org.dspace.content.Collection; import org.dspace.content.Collection;
import org.dspace.core.ConfigurationManager; import org.dspace.core.ConfigurationManager;
@@ -54,6 +55,7 @@ import org.dspace.eperson.EPerson;
import org.dspace.statistics.SolrLogger; import org.dspace.statistics.SolrLogger;
import java.io.*; import java.io.*;
import java.net.URL;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.*; import java.util.*;
@@ -91,9 +93,13 @@ public class StatisticsClient
Options options = new Options(); Options options = new Options();
options.addOption("m", "mark-bots", false, "Update Marked Robots By IP in Statistics"); options.addOption("u", "update-spider-files", false,
options.addOption("f", "delete-bots-by-flag", false, "Delete Robots in Statistics By IsBot Field"); "Update Spider IP Files from internet into " +
options.addOption("i", "delete-bots-by-ip", false, "Delete Robots in Statistics By spider ip's"); ConfigurationManager.getProperty("dspace.dir") + "/config/spiders");
options.addOption("m", "mark-spiders", false, "Update isBog Flag in Solr");
options.addOption("f", "delete-spiders-by-flag", false, "Delete Spiders in Solr By isBot Flag");
options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address");
options.addOption("h", "help", false, "help"); options.addOption("h", "help", false, "help");
CommandLine line = parser.parse(options, args); CommandLine line = parser.parse(options, args);
@@ -104,7 +110,11 @@ public class StatisticsClient
printHelp(options, 0); printHelp(options, 0);
} }
if (line.hasOption('m')) if(line.hasOption("u"))
{
StatisticsClient.updateSpiderFiles();
}
else if (line.hasOption('m'))
{ {
SolrLogger.markRobotsByIP(); SolrLogger.markRobotsByIP();
} }
@@ -122,5 +132,53 @@ public class StatisticsClient
} }
} }
/**
* Method to update Spiders in config directory.
*/
private static void updateSpiderFiles()
{
try
{
System.out.println("Downloading latest spider IP addresses:");
// Get the list URLs to download from
String urls = ConfigurationManager.getProperty("solr.spiderips.urls");
if ((urls == null) || ("".equals(urls)))
{
System.err.println(" - Missing setting from dspace.cfg: solr.spiderips.urls");
System.exit(0);
}
// Get the location of spiders directory
File spiders = new File(ConfigurationManager.getProperty("dspace.dir"),"config/spiders");
if(!spiders.exists())
spiders.mkdirs();
String[] values = urls.split(",");
for (String value : values)
{
value = value.trim();
System.out.println(" Downloading: " + value);
URL url = new URL(value);
Get get = new Get();
get.setDest(new File(spiders, url.getHost() + url.getPath().replace("/","-")));
get.setSrc(url);
get.setUseTimestamp(true);
get.execute();
}
} catch (Exception e)
{
System.err.println(" - Error: " + e.getMessage());
e.printStackTrace();
System.exit(1);
}
}
} }

View File

@@ -145,22 +145,6 @@
</step> </step>
</command> </command>
<command>
<name>log-converter</name>
<description>Convert dspace.log files ready for import into solr statistics</description>
<step>
<class>org.dspace.statistics.util.ClassicDSpaceLogConverter</class>
</step>
</command>
<command>
<name>log-importer</name>
<description>Import previously converted log files into solr statistics</description>
<step>
<class>org.dspace.statistics.util.StatisticsImporter</class>
</step>
</command>
<command> <command>
<name>make-handle-config</name> <name>make-handle-config</name>
<description>Run the handle server simple setup command</description> <description>Run the handle server simple setup command</description>
@@ -261,6 +245,30 @@
</step> </step>
</command> </command>
<command>
<name>stats-log-converter</name>
<description>Convert dspace.log files ready for import into solr statistics</description>
<step>
<class>org.dspace.statistics.util.ClassicDSpaceLogConverter</class>
</step>
</command>
<command>
<name>stats-log-importer</name>
<description>Import previously converted log files into solr statistics</description>
<step>
<class>org.dspace.statistics.util.StatisticsImporter</class>
</step>
</command>
<command>
<name>stats-util</name>
<description>Statistics Client for Maintenance of Solr Statistics Indexes</description>
<step>
<class>org.dspace.statistics.util.StatisticsClient</class>
</step>
</command>
<command> <command>
<name>structure-builder</name> <name>structure-builder</name>
<description>Build DSpace commnity and collection structure</description> <description>Build DSpace commnity and collection structure</description>
@@ -301,12 +309,4 @@
</step> </step>
</command> </command>
<command>
<name>update-spider-ips</name>
<description>Update the list of known search engine IP addresses</description>
<step>
<class>org.dspace.statistics.util.DownloadSpiderIPs</class>
</step>
</command>
</commands> </commands>