mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 10:04:21 +00:00
[DS-440] Add Launcher entry and adjust command names to be uniform for stats tools. Consolidate DownloadSpiderIPs into StatisticsClient.
git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@4754 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -1,114 +0,0 @@
|
||||
/*
|
||||
* DownloadSpiderIPs.java
|
||||
*
|
||||
* Version: $Revision:$
|
||||
*
|
||||
* Date: $Date:$
|
||||
*
|
||||
* Copyright (c) 2002-2010, The DSpace Foundation. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* - Neither the name of the DSpace Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*/
|
||||
|
||||
package org.dspace.statistics.util;
|
||||
|
||||
import org.apache.tools.ant.taskdefs.Get;
|
||||
import org.dspace.core.ConfigurationManager;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.InetAddress;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.Vector;
|
||||
|
||||
/**
|
||||
* Class to download and process lists of search engine spider IP addresses
|
||||
*
|
||||
* @author Stuart Lewis
|
||||
* @author Mark Diggory (mdiggory at atmire.com)
|
||||
*/
|
||||
public class DownloadSpiderIPs
|
||||
{
|
||||
/** Vector of entries */
|
||||
private static Vector<String> ips;
|
||||
|
||||
/**
|
||||
* Main method to run the script
|
||||
*
|
||||
* @param args The command line arguments
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
{
|
||||
try
|
||||
{
|
||||
System.out.println("Downloading latest spider IP addresses:");
|
||||
ips = new Vector<String>();
|
||||
|
||||
// Get the list URLs to download from
|
||||
String urls = ConfigurationManager.getProperty("solr.spiderips.urls");
|
||||
if ((urls == null) || ("".equals(urls)))
|
||||
{
|
||||
System.err.println(" - Missing setting from dspace.cfg: solr.spiderips.urls");
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
// Get the location of spiders directory
|
||||
File spiders = new File(ConfigurationManager.getProperty("dspace.dir"),"config/spiders");
|
||||
|
||||
if(!spiders.exists())
|
||||
spiders.mkdirs();
|
||||
|
||||
String[] values = urls.split(",");
|
||||
for (String value : values)
|
||||
{
|
||||
value = value.trim();
|
||||
System.out.println(" Downloading: " + value);
|
||||
|
||||
URL url = new URL(value);
|
||||
|
||||
Get get = new Get();
|
||||
get.setDest(new File(spiders, url.getHost() + url.getPath().replace("/","-")));
|
||||
get.setSrc(url);
|
||||
get.setUseTimestamp(true);
|
||||
get.execute();
|
||||
|
||||
}
|
||||
|
||||
|
||||
} catch (Exception e)
|
||||
{
|
||||
System.err.println(" - Error: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@@ -45,6 +45,7 @@ import org.apache.commons.lang.time.DateFormatUtils;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.tools.ant.taskdefs.Get;
|
||||
import org.dspace.content.*;
|
||||
import org.dspace.content.Collection;
|
||||
import org.dspace.core.ConfigurationManager;
|
||||
@@ -54,6 +55,7 @@ import org.dspace.eperson.EPerson;
|
||||
import org.dspace.statistics.SolrLogger;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
@@ -91,9 +93,13 @@ public class StatisticsClient
|
||||
|
||||
Options options = new Options();
|
||||
|
||||
options.addOption("m", "mark-bots", false, "Update Marked Robots By IP in Statistics");
|
||||
options.addOption("f", "delete-bots-by-flag", false, "Delete Robots in Statistics By IsBot Field");
|
||||
options.addOption("i", "delete-bots-by-ip", false, "Delete Robots in Statistics By spider ip's");
|
||||
options.addOption("u", "update-spider-files", false,
|
||||
"Update Spider IP Files from internet into " +
|
||||
ConfigurationManager.getProperty("dspace.dir") + "/config/spiders");
|
||||
|
||||
options.addOption("m", "mark-spiders", false, "Update isBog Flag in Solr");
|
||||
options.addOption("f", "delete-spiders-by-flag", false, "Delete Spiders in Solr By isBot Flag");
|
||||
options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address");
|
||||
options.addOption("h", "help", false, "help");
|
||||
|
||||
CommandLine line = parser.parse(options, args);
|
||||
@@ -104,7 +110,11 @@ public class StatisticsClient
|
||||
printHelp(options, 0);
|
||||
}
|
||||
|
||||
if (line.hasOption('m'))
|
||||
if(line.hasOption("u"))
|
||||
{
|
||||
StatisticsClient.updateSpiderFiles();
|
||||
}
|
||||
else if (line.hasOption('m'))
|
||||
{
|
||||
SolrLogger.markRobotsByIP();
|
||||
}
|
||||
@@ -122,5 +132,53 @@ public class StatisticsClient
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to update Spiders in config directory.
|
||||
*/
|
||||
private static void updateSpiderFiles()
|
||||
{
|
||||
try
|
||||
{
|
||||
System.out.println("Downloading latest spider IP addresses:");
|
||||
|
||||
// Get the list URLs to download from
|
||||
String urls = ConfigurationManager.getProperty("solr.spiderips.urls");
|
||||
if ((urls == null) || ("".equals(urls)))
|
||||
{
|
||||
System.err.println(" - Missing setting from dspace.cfg: solr.spiderips.urls");
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
// Get the location of spiders directory
|
||||
File spiders = new File(ConfigurationManager.getProperty("dspace.dir"),"config/spiders");
|
||||
|
||||
if(!spiders.exists())
|
||||
spiders.mkdirs();
|
||||
|
||||
String[] values = urls.split(",");
|
||||
for (String value : values)
|
||||
{
|
||||
value = value.trim();
|
||||
System.out.println(" Downloading: " + value);
|
||||
|
||||
URL url = new URL(value);
|
||||
|
||||
Get get = new Get();
|
||||
get.setDest(new File(spiders, url.getHost() + url.getPath().replace("/","-")));
|
||||
get.setSrc(url);
|
||||
get.setUseTimestamp(true);
|
||||
get.execute();
|
||||
|
||||
}
|
||||
|
||||
|
||||
} catch (Exception e)
|
||||
{
|
||||
System.err.println(" - Error: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -145,22 +145,6 @@
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>log-converter</name>
|
||||
<description>Convert dspace.log files ready for import into solr statistics</description>
|
||||
<step>
|
||||
<class>org.dspace.statistics.util.ClassicDSpaceLogConverter</class>
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>log-importer</name>
|
||||
<description>Import previously converted log files into solr statistics</description>
|
||||
<step>
|
||||
<class>org.dspace.statistics.util.StatisticsImporter</class>
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>make-handle-config</name>
|
||||
<description>Run the handle server simple setup command</description>
|
||||
@@ -261,6 +245,30 @@
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>stats-log-converter</name>
|
||||
<description>Convert dspace.log files ready for import into solr statistics</description>
|
||||
<step>
|
||||
<class>org.dspace.statistics.util.ClassicDSpaceLogConverter</class>
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>stats-log-importer</name>
|
||||
<description>Import previously converted log files into solr statistics</description>
|
||||
<step>
|
||||
<class>org.dspace.statistics.util.StatisticsImporter</class>
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>stats-util</name>
|
||||
<description>Statistics Client for Maintenance of Solr Statistics Indexes</description>
|
||||
<step>
|
||||
<class>org.dspace.statistics.util.StatisticsClient</class>
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>structure-builder</name>
|
||||
<description>Build DSpace commnity and collection structure</description>
|
||||
@@ -301,12 +309,4 @@
|
||||
</step>
|
||||
</command>
|
||||
|
||||
<command>
|
||||
<name>update-spider-ips</name>
|
||||
<description>Update the list of known search engine IP addresses</description>
|
||||
<step>
|
||||
<class>org.dspace.statistics.util.DownloadSpiderIPs</class>
|
||||
</step>
|
||||
</command>
|
||||
|
||||
</commands>
|
Reference in New Issue
Block a user