mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 18:14:26 +00:00
[DS-440] Add Launcher entry and adjust command names to be uniform for stats tools. Consolidate DownloadSpiderIPs into StatisticsClient.
git-svn-id: http://scm.dspace.org/svn/repo/dspace/trunk@4754 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -1,114 +0,0 @@
|
|||||||
/*
|
|
||||||
* DownloadSpiderIPs.java
|
|
||||||
*
|
|
||||||
* Version: $Revision:$
|
|
||||||
*
|
|
||||||
* Date: $Date:$
|
|
||||||
*
|
|
||||||
* Copyright (c) 2002-2010, The DSpace Foundation. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are
|
|
||||||
* met:
|
|
||||||
*
|
|
||||||
* - Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* - Neither the name of the DSpace Foundation nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived from
|
|
||||||
* this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
||||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
|
||||||
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
||||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
|
||||||
* DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.dspace.statistics.util;
|
|
||||||
|
|
||||||
import org.apache.tools.ant.taskdefs.Get;
|
|
||||||
import org.dspace.core.ConfigurationManager;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLEncoder;
|
|
||||||
import java.util.Vector;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class to download and process lists of search engine spider IP addresses
|
|
||||||
*
|
|
||||||
* @author Stuart Lewis
|
|
||||||
* @author Mark Diggory (mdiggory at atmire.com)
|
|
||||||
*/
|
|
||||||
public class DownloadSpiderIPs
|
|
||||||
{
|
|
||||||
/** Vector of entries */
|
|
||||||
private static Vector<String> ips;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Main method to run the script
|
|
||||||
*
|
|
||||||
* @param args The command line arguments
|
|
||||||
*/
|
|
||||||
public static void main(String[] args)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
System.out.println("Downloading latest spider IP addresses:");
|
|
||||||
ips = new Vector<String>();
|
|
||||||
|
|
||||||
// Get the list URLs to download from
|
|
||||||
String urls = ConfigurationManager.getProperty("solr.spiderips.urls");
|
|
||||||
if ((urls == null) || ("".equals(urls)))
|
|
||||||
{
|
|
||||||
System.err.println(" - Missing setting from dspace.cfg: solr.spiderips.urls");
|
|
||||||
System.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the location of spiders directory
|
|
||||||
File spiders = new File(ConfigurationManager.getProperty("dspace.dir"),"config/spiders");
|
|
||||||
|
|
||||||
if(!spiders.exists())
|
|
||||||
spiders.mkdirs();
|
|
||||||
|
|
||||||
String[] values = urls.split(",");
|
|
||||||
for (String value : values)
|
|
||||||
{
|
|
||||||
value = value.trim();
|
|
||||||
System.out.println(" Downloading: " + value);
|
|
||||||
|
|
||||||
URL url = new URL(value);
|
|
||||||
|
|
||||||
Get get = new Get();
|
|
||||||
get.setDest(new File(spiders, url.getHost() + url.getPath().replace("/","-")));
|
|
||||||
get.setSrc(url);
|
|
||||||
get.setUseTimestamp(true);
|
|
||||||
get.execute();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} catch (Exception e)
|
|
||||||
{
|
|
||||||
System.err.println(" - Error: " + e.getMessage());
|
|
||||||
e.printStackTrace();
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
@@ -45,6 +45,7 @@ import org.apache.commons.lang.time.DateFormatUtils;
|
|||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
|
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.tools.ant.taskdefs.Get;
|
||||||
import org.dspace.content.*;
|
import org.dspace.content.*;
|
||||||
import org.dspace.content.Collection;
|
import org.dspace.content.Collection;
|
||||||
import org.dspace.core.ConfigurationManager;
|
import org.dspace.core.ConfigurationManager;
|
||||||
@@ -54,6 +55,7 @@ import org.dspace.eperson.EPerson;
|
|||||||
import org.dspace.statistics.SolrLogger;
|
import org.dspace.statistics.SolrLogger;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
import java.net.URL;
|
||||||
import java.text.DecimalFormat;
|
import java.text.DecimalFormat;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
@@ -91,9 +93,13 @@ public class StatisticsClient
|
|||||||
|
|
||||||
Options options = new Options();
|
Options options = new Options();
|
||||||
|
|
||||||
options.addOption("m", "mark-bots", false, "Update Marked Robots By IP in Statistics");
|
options.addOption("u", "update-spider-files", false,
|
||||||
options.addOption("f", "delete-bots-by-flag", false, "Delete Robots in Statistics By IsBot Field");
|
"Update Spider IP Files from internet into " +
|
||||||
options.addOption("i", "delete-bots-by-ip", false, "Delete Robots in Statistics By spider ip's");
|
ConfigurationManager.getProperty("dspace.dir") + "/config/spiders");
|
||||||
|
|
||||||
|
options.addOption("m", "mark-spiders", false, "Update isBog Flag in Solr");
|
||||||
|
options.addOption("f", "delete-spiders-by-flag", false, "Delete Spiders in Solr By isBot Flag");
|
||||||
|
options.addOption("i", "delete-spiders-by-ip", false, "Delete Spiders in Solr By IP Address");
|
||||||
options.addOption("h", "help", false, "help");
|
options.addOption("h", "help", false, "help");
|
||||||
|
|
||||||
CommandLine line = parser.parse(options, args);
|
CommandLine line = parser.parse(options, args);
|
||||||
@@ -104,7 +110,11 @@ public class StatisticsClient
|
|||||||
printHelp(options, 0);
|
printHelp(options, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.hasOption('m'))
|
if(line.hasOption("u"))
|
||||||
|
{
|
||||||
|
StatisticsClient.updateSpiderFiles();
|
||||||
|
}
|
||||||
|
else if (line.hasOption('m'))
|
||||||
{
|
{
|
||||||
SolrLogger.markRobotsByIP();
|
SolrLogger.markRobotsByIP();
|
||||||
}
|
}
|
||||||
@@ -122,5 +132,53 @@ public class StatisticsClient
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method to update Spiders in config directory.
|
||||||
|
*/
|
||||||
|
private static void updateSpiderFiles()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
System.out.println("Downloading latest spider IP addresses:");
|
||||||
|
|
||||||
|
// Get the list URLs to download from
|
||||||
|
String urls = ConfigurationManager.getProperty("solr.spiderips.urls");
|
||||||
|
if ((urls == null) || ("".equals(urls)))
|
||||||
|
{
|
||||||
|
System.err.println(" - Missing setting from dspace.cfg: solr.spiderips.urls");
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the location of spiders directory
|
||||||
|
File spiders = new File(ConfigurationManager.getProperty("dspace.dir"),"config/spiders");
|
||||||
|
|
||||||
|
if(!spiders.exists())
|
||||||
|
spiders.mkdirs();
|
||||||
|
|
||||||
|
String[] values = urls.split(",");
|
||||||
|
for (String value : values)
|
||||||
|
{
|
||||||
|
value = value.trim();
|
||||||
|
System.out.println(" Downloading: " + value);
|
||||||
|
|
||||||
|
URL url = new URL(value);
|
||||||
|
|
||||||
|
Get get = new Get();
|
||||||
|
get.setDest(new File(spiders, url.getHost() + url.getPath().replace("/","-")));
|
||||||
|
get.setSrc(url);
|
||||||
|
get.setUseTimestamp(true);
|
||||||
|
get.execute();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} catch (Exception e)
|
||||||
|
{
|
||||||
|
System.err.println(" - Error: " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
@@ -145,22 +145,6 @@
|
|||||||
</step>
|
</step>
|
||||||
</command>
|
</command>
|
||||||
|
|
||||||
<command>
|
|
||||||
<name>log-converter</name>
|
|
||||||
<description>Convert dspace.log files ready for import into solr statistics</description>
|
|
||||||
<step>
|
|
||||||
<class>org.dspace.statistics.util.ClassicDSpaceLogConverter</class>
|
|
||||||
</step>
|
|
||||||
</command>
|
|
||||||
|
|
||||||
<command>
|
|
||||||
<name>log-importer</name>
|
|
||||||
<description>Import previously converted log files into solr statistics</description>
|
|
||||||
<step>
|
|
||||||
<class>org.dspace.statistics.util.StatisticsImporter</class>
|
|
||||||
</step>
|
|
||||||
</command>
|
|
||||||
|
|
||||||
<command>
|
<command>
|
||||||
<name>make-handle-config</name>
|
<name>make-handle-config</name>
|
||||||
<description>Run the handle server simple setup command</description>
|
<description>Run the handle server simple setup command</description>
|
||||||
@@ -261,6 +245,30 @@
|
|||||||
</step>
|
</step>
|
||||||
</command>
|
</command>
|
||||||
|
|
||||||
|
<command>
|
||||||
|
<name>stats-log-converter</name>
|
||||||
|
<description>Convert dspace.log files ready for import into solr statistics</description>
|
||||||
|
<step>
|
||||||
|
<class>org.dspace.statistics.util.ClassicDSpaceLogConverter</class>
|
||||||
|
</step>
|
||||||
|
</command>
|
||||||
|
|
||||||
|
<command>
|
||||||
|
<name>stats-log-importer</name>
|
||||||
|
<description>Import previously converted log files into solr statistics</description>
|
||||||
|
<step>
|
||||||
|
<class>org.dspace.statistics.util.StatisticsImporter</class>
|
||||||
|
</step>
|
||||||
|
</command>
|
||||||
|
|
||||||
|
<command>
|
||||||
|
<name>stats-util</name>
|
||||||
|
<description>Statistics Client for Maintenance of Solr Statistics Indexes</description>
|
||||||
|
<step>
|
||||||
|
<class>org.dspace.statistics.util.StatisticsClient</class>
|
||||||
|
</step>
|
||||||
|
</command>
|
||||||
|
|
||||||
<command>
|
<command>
|
||||||
<name>structure-builder</name>
|
<name>structure-builder</name>
|
||||||
<description>Build DSpace commnity and collection structure</description>
|
<description>Build DSpace commnity and collection structure</description>
|
||||||
@@ -301,12 +309,4 @@
|
|||||||
</step>
|
</step>
|
||||||
</command>
|
</command>
|
||||||
|
|
||||||
<command>
|
|
||||||
<name>update-spider-ips</name>
|
|
||||||
<description>Update the list of known search engine IP addresses</description>
|
|
||||||
<step>
|
|
||||||
<class>org.dspace.statistics.util.DownloadSpiderIPs</class>
|
|
||||||
</step>
|
|
||||||
</command>
|
|
||||||
|
|
||||||
</commands>
|
</commands>
|
Reference in New Issue
Block a user