mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-18 15:33:09 +00:00
SF patch [2560974] for SF feature request [2560839] Make sitemap directory configurable
git-svn-id: http://scm.dspace.org/svn/repo/branches/dspace-1_5_x@3471 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
@@ -1,384 +1,383 @@
|
|||||||
/*
|
/*
|
||||||
* GenerateSitemaps.java
|
* GenerateSitemaps.java
|
||||||
*
|
*
|
||||||
* Version: $Revision: 1.1 $
|
* Version: $Revision: 1.1 $
|
||||||
*
|
*
|
||||||
* Date: $Date: 2006/03/17 00:04:38 $
|
* Date: $Date: 2006/03/17 00:04:38 $
|
||||||
*
|
*
|
||||||
* Copyright (c) 2002-2006, Hewlett-Packard Company and Massachusetts
|
* Copyright (c) 2002-2006, Hewlett-Packard Company and Massachusetts
|
||||||
* Institute of Technology. All rights reserved.
|
* Institute of Technology. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are
|
* modification, are permitted provided that the following conditions are
|
||||||
* met:
|
* met:
|
||||||
*
|
*
|
||||||
* - Redistributions of source code must retain the above copyright
|
* - Redistributions of source code must retain the above copyright
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* notice, this list of conditions and the following disclaimer.
|
||||||
*
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
* documentation and/or other materials provided with the distribution.
|
* documentation and/or other materials provided with the distribution.
|
||||||
*
|
*
|
||||||
* - Neither the name of the Hewlett-Packard Company nor the name of the
|
* - Neither the name of the Hewlett-Packard Company nor the name of the
|
||||||
* Massachusetts Institute of Technology nor the names of their
|
* Massachusetts Institute of Technology nor the names of their
|
||||||
* contributors may be used to endorse or promote products derived from
|
* contributors may be used to endorse or promote products derived from
|
||||||
* this software without specific prior written permission.
|
* this software without specific prior written permission.
|
||||||
*
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
* DAMAGE.
|
* DAMAGE.
|
||||||
*/
|
*/
|
||||||
package org.dspace.app.sitemap;
|
package org.dspace.app.sitemap;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
||||||
import org.apache.commons.cli.CommandLine;
|
import org.apache.commons.cli.CommandLine;
|
||||||
import org.apache.commons.cli.CommandLineParser;
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
import org.apache.commons.cli.HelpFormatter;
|
import org.apache.commons.cli.HelpFormatter;
|
||||||
import org.apache.commons.cli.Options;
|
import org.apache.commons.cli.Options;
|
||||||
import org.apache.commons.cli.ParseException;
|
import org.apache.commons.cli.ParseException;
|
||||||
import org.apache.commons.cli.PosixParser;
|
import org.apache.commons.cli.PosixParser;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.dspace.content.Collection;
|
import org.dspace.content.Collection;
|
||||||
import org.dspace.content.Community;
|
import org.dspace.content.Community;
|
||||||
import org.dspace.content.Item;
|
import org.dspace.content.Item;
|
||||||
import org.dspace.content.ItemIterator;
|
import org.dspace.content.ItemIterator;
|
||||||
import org.dspace.core.ConfigurationManager;
|
import org.dspace.core.ConfigurationManager;
|
||||||
import org.dspace.core.Context;
|
import org.dspace.core.Context;
|
||||||
import org.dspace.core.LogManager;
|
import org.dspace.core.LogManager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Command-line utility for generating HTML and Sitemaps.org protocol Sitemaps.
|
* Command-line utility for generating HTML and Sitemaps.org protocol Sitemaps.
|
||||||
*
|
*
|
||||||
* @author Robert Tansley
|
* @author Robert Tansley
|
||||||
* @author Stuart Lewis
|
* @author Stuart Lewis
|
||||||
*/
|
*/
|
||||||
public class GenerateSitemaps
|
public class GenerateSitemaps
|
||||||
{
|
{
|
||||||
/** Logger */
|
/** Logger */
|
||||||
private static Logger log = Logger.getLogger(GenerateSitemaps.class);
|
private static Logger log = Logger.getLogger(GenerateSitemaps.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception
|
public static void main(String[] args) throws Exception
|
||||||
{
|
{
|
||||||
final String usage = GenerateSitemaps.class.getCanonicalName();
|
final String usage = GenerateSitemaps.class.getCanonicalName();
|
||||||
|
|
||||||
CommandLineParser parser = new PosixParser();
|
CommandLineParser parser = new PosixParser();
|
||||||
HelpFormatter hf = new HelpFormatter();
|
HelpFormatter hf = new HelpFormatter();
|
||||||
|
|
||||||
Options options = new Options();
|
Options options = new Options();
|
||||||
|
|
||||||
options.addOption("h", "help", false, "help");
|
options.addOption("h", "help", false, "help");
|
||||||
options.addOption("s", "no_sitemaps", false,
|
options.addOption("s", "no_sitemaps", false,
|
||||||
"do not generate sitemaps.org protocol sitemap");
|
"do not generate sitemaps.org protocol sitemap");
|
||||||
options.addOption("b", "no_htmlmap", false,
|
options.addOption("b", "no_htmlmap", false,
|
||||||
"do not generate a basic HTML sitemap");
|
"do not generate a basic HTML sitemap");
|
||||||
options.addOption("a", "ping_all", false,
|
options.addOption("a", "ping_all", false,
|
||||||
"ping configured search engines");
|
"ping configured search engines");
|
||||||
options
|
options
|
||||||
.addOption("p", "ping", true,
|
.addOption("p", "ping", true,
|
||||||
"ping specified search engine URL");
|
"ping specified search engine URL");
|
||||||
|
|
||||||
CommandLine line = null;
|
CommandLine line = null;
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
line = parser.parse(options, args);
|
line = parser.parse(options, args);
|
||||||
}
|
}
|
||||||
catch (ParseException pe)
|
catch (ParseException pe)
|
||||||
{
|
{
|
||||||
hf.printHelp(usage, options);
|
hf.printHelp(usage, options);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.hasOption('h'))
|
if (line.hasOption('h'))
|
||||||
{
|
{
|
||||||
hf.printHelp(usage, options);
|
hf.printHelp(usage, options);
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.getArgs().length != 0)
|
if (line.getArgs().length != 0)
|
||||||
{
|
{
|
||||||
hf.printHelp(usage, options);
|
hf.printHelp(usage, options);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sanity check -- if no sitemap generation or pinging to do, print
|
* Sanity check -- if no sitemap generation or pinging to do, print
|
||||||
* usage
|
* usage
|
||||||
*/
|
*/
|
||||||
if (line.getArgs().length != 0 || line.hasOption('b')
|
if (line.getArgs().length != 0 || line.hasOption('b')
|
||||||
&& line.hasOption('s') && !line.hasOption('g')
|
&& line.hasOption('s') && !line.hasOption('g')
|
||||||
&& !line.hasOption('m') && !line.hasOption('y')
|
&& !line.hasOption('m') && !line.hasOption('y')
|
||||||
&& !line.hasOption('p'))
|
&& !line.hasOption('p'))
|
||||||
{
|
{
|
||||||
System.err
|
System.err
|
||||||
.println("Nothing to do (no sitemap to generate, no search engines to ping)");
|
.println("Nothing to do (no sitemap to generate, no search engines to ping)");
|
||||||
hf.printHelp(usage, options);
|
hf.printHelp(usage, options);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note the negation (CLI options indicate NOT to generate a sitemap)
|
// Note the negation (CLI options indicate NOT to generate a sitemap)
|
||||||
if (!line.hasOption('b') || !line.hasOption('s'))
|
if (!line.hasOption('b') || !line.hasOption('s'))
|
||||||
{
|
{
|
||||||
generateSitemaps(!line.hasOption('b'), !line.hasOption('s'));
|
generateSitemaps(!line.hasOption('b'), !line.hasOption('s'));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.hasOption('a'))
|
if (line.hasOption('a'))
|
||||||
{
|
{
|
||||||
pingConfiguredSearchEngines();
|
pingConfiguredSearchEngines();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.hasOption('p'))
|
if (line.hasOption('p'))
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
pingSearchEngine(line.getOptionValue('p'));
|
pingSearchEngine(line.getOptionValue('p'));
|
||||||
}
|
}
|
||||||
catch (MalformedURLException me)
|
catch (MalformedURLException me)
|
||||||
{
|
{
|
||||||
System.err
|
System.err
|
||||||
.println("Bad search engine URL (include all except sitemap URL)");
|
.println("Bad search engine URL (include all except sitemap URL)");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate sitemap.org protocol and/or basic HTML sitemaps.
|
* Generate sitemap.org protocol and/or basic HTML sitemaps.
|
||||||
*
|
*
|
||||||
* @param makeHTMLMap
|
* @param makeHTMLMap
|
||||||
* if {@code true}, generate an HTML sitemap.
|
* if {@code true}, generate an HTML sitemap.
|
||||||
* @param makeSitemapOrg
|
* @param makeSitemapOrg
|
||||||
* if {@code true}, generate an sitemap.org sitemap.
|
* if {@code true}, generate an sitemap.org sitemap.
|
||||||
* @throws SQLException
|
* @throws SQLException
|
||||||
* if a database error occurs.
|
* if a database error occurs.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* if IO error occurs.
|
* if IO error occurs.
|
||||||
*/
|
*/
|
||||||
public static void generateSitemaps(boolean makeHTMLMap,
|
public static void generateSitemaps(boolean makeHTMLMap,
|
||||||
boolean makeSitemapOrg) throws SQLException, IOException
|
boolean makeSitemapOrg) throws SQLException, IOException
|
||||||
{
|
{
|
||||||
String sitemapStem = ConfigurationManager.getProperty("dspace.url")
|
String sitemapStem = ConfigurationManager.getProperty("dspace.url")
|
||||||
+ "/sitemap";
|
+ "/sitemap";
|
||||||
String htmlMapStem = ConfigurationManager.getProperty("dspace.url")
|
String htmlMapStem = ConfigurationManager.getProperty("dspace.url")
|
||||||
+ "/htmlmap";
|
+ "/htmlmap";
|
||||||
String handleURLStem = ConfigurationManager.getProperty("dspace.url")
|
String handleURLStem = ConfigurationManager.getProperty("dspace.url")
|
||||||
+ "/handle/";
|
+ "/handle/";
|
||||||
|
|
||||||
File outputDir = new File(ConfigurationManager
|
File outputDir = new File(ConfigurationManager.getProperty("sitemap.dir"));
|
||||||
.getProperty("dspace.dir"), "sitemaps");
|
if (!outputDir.exists()) {
|
||||||
if (!outputDir.exists()) {
|
outputDir.mkdir();
|
||||||
outputDir.mkdir();
|
}
|
||||||
}
|
|
||||||
|
AbstractGenerator html = null;
|
||||||
AbstractGenerator html = null;
|
AbstractGenerator sitemapsOrg = null;
|
||||||
AbstractGenerator sitemapsOrg = null;
|
|
||||||
|
if (makeHTMLMap)
|
||||||
if (makeHTMLMap)
|
{
|
||||||
{
|
html = new HTMLSitemapGenerator(outputDir, htmlMapStem + "?map=",
|
||||||
html = new HTMLSitemapGenerator(outputDir, htmlMapStem + "?map=",
|
null);
|
||||||
null);
|
}
|
||||||
}
|
|
||||||
|
if (makeSitemapOrg)
|
||||||
if (makeSitemapOrg)
|
{
|
||||||
{
|
sitemapsOrg = new SitemapsOrgGenerator(outputDir, sitemapStem
|
||||||
sitemapsOrg = new SitemapsOrgGenerator(outputDir, sitemapStem
|
+ "?map=", null);
|
||||||
+ "?map=", null);
|
}
|
||||||
}
|
|
||||||
|
Context c = new Context();
|
||||||
Context c = new Context();
|
|
||||||
|
Community[] comms = Community.findAll(c);
|
||||||
Community[] comms = Community.findAll(c);
|
|
||||||
|
for (int i = 0; i < comms.length; i++)
|
||||||
for (int i = 0; i < comms.length; i++)
|
{
|
||||||
{
|
String url = handleURLStem + comms[i].getHandle();
|
||||||
String url = handleURLStem + comms[i].getHandle();
|
|
||||||
|
if (makeHTMLMap)
|
||||||
if (makeHTMLMap)
|
html.addURL(url, null);
|
||||||
html.addURL(url, null);
|
if (makeSitemapOrg)
|
||||||
if (makeSitemapOrg)
|
sitemapsOrg.addURL(url, null);
|
||||||
sitemapsOrg.addURL(url, null);
|
}
|
||||||
}
|
|
||||||
|
Collection[] colls = Collection.findAll(c);
|
||||||
Collection[] colls = Collection.findAll(c);
|
|
||||||
|
for (int i = 0; i < colls.length; i++)
|
||||||
for (int i = 0; i < colls.length; i++)
|
{
|
||||||
{
|
String url = handleURLStem + colls[i].getHandle();
|
||||||
String url = handleURLStem + colls[i].getHandle();
|
|
||||||
|
if (makeHTMLMap)
|
||||||
if (makeHTMLMap)
|
html.addURL(url, null);
|
||||||
html.addURL(url, null);
|
if (makeSitemapOrg)
|
||||||
if (makeSitemapOrg)
|
sitemapsOrg.addURL(url, null);
|
||||||
sitemapsOrg.addURL(url, null);
|
}
|
||||||
}
|
|
||||||
|
ItemIterator allItems = Item.findAll(c);
|
||||||
ItemIterator allItems = Item.findAll(c);
|
try
|
||||||
try
|
{
|
||||||
{
|
int itemCount = 0;
|
||||||
int itemCount = 0;
|
|
||||||
|
while (allItems.hasNext())
|
||||||
while (allItems.hasNext())
|
{
|
||||||
{
|
Item i = allItems.next();
|
||||||
Item i = allItems.next();
|
String url = handleURLStem + i.getHandle();
|
||||||
String url = handleURLStem + i.getHandle();
|
Date lastMod = i.getLastModified();
|
||||||
Date lastMod = i.getLastModified();
|
|
||||||
|
if (makeHTMLMap)
|
||||||
if (makeHTMLMap)
|
html.addURL(url, lastMod);
|
||||||
html.addURL(url, lastMod);
|
if (makeSitemapOrg)
|
||||||
if (makeSitemapOrg)
|
sitemapsOrg.addURL(url, lastMod);
|
||||||
sitemapsOrg.addURL(url, lastMod);
|
i.decache();
|
||||||
i.decache();
|
|
||||||
|
itemCount++;
|
||||||
itemCount++;
|
}
|
||||||
}
|
|
||||||
|
if (makeHTMLMap)
|
||||||
if (makeHTMLMap)
|
{
|
||||||
{
|
int files = html.finish();
|
||||||
int files = html.finish();
|
log.info(LogManager.getHeader(c, "write_sitemap",
|
||||||
log.info(LogManager.getHeader(c, "write_sitemap",
|
"type=html,num_files=" + files + ",communities="
|
||||||
"type=html,num_files=" + files + ",communities="
|
+ comms.length + ",collections=" + colls.length
|
||||||
+ comms.length + ",collections=" + colls.length
|
+ ",items=" + itemCount));
|
||||||
+ ",items=" + itemCount));
|
}
|
||||||
}
|
|
||||||
|
if (makeSitemapOrg)
|
||||||
if (makeSitemapOrg)
|
{
|
||||||
{
|
int files = sitemapsOrg.finish();
|
||||||
int files = sitemapsOrg.finish();
|
log.info(LogManager.getHeader(c, "write_sitemap",
|
||||||
log.info(LogManager.getHeader(c, "write_sitemap",
|
"type=html,num_files=" + files + ",communities="
|
||||||
"type=html,num_files=" + files + ",communities="
|
+ comms.length + ",collections=" + colls.length
|
||||||
+ comms.length + ",collections=" + colls.length
|
+ ",items=" + itemCount));
|
||||||
+ ",items=" + itemCount));
|
}
|
||||||
}
|
}
|
||||||
}
|
finally
|
||||||
finally
|
{
|
||||||
{
|
if (allItems != null)
|
||||||
if (allItems != null)
|
allItems.close();
|
||||||
allItems.close();
|
}
|
||||||
}
|
|
||||||
|
c.abort();
|
||||||
c.abort();
|
}
|
||||||
}
|
|
||||||
|
/**
|
||||||
/**
|
* Ping all search engines configured in {@code dspace.cfg}.
|
||||||
* Ping all search engines configured in {@code dspace.cfg}.
|
*
|
||||||
*
|
* @throws UnsupportedEncodingException
|
||||||
* @throws UnsupportedEncodingException
|
* theoretically should never happen
|
||||||
* theoretically should never happen
|
*/
|
||||||
*/
|
public static void pingConfiguredSearchEngines()
|
||||||
public static void pingConfiguredSearchEngines()
|
throws UnsupportedEncodingException
|
||||||
throws UnsupportedEncodingException
|
{
|
||||||
{
|
String engineURLProp = ConfigurationManager
|
||||||
String engineURLProp = ConfigurationManager
|
.getProperty("sitemap.engineurls");
|
||||||
.getProperty("sitemap.engineurls");
|
String engineURLs[] = null;
|
||||||
String engineURLs[] = null;
|
|
||||||
|
if (engineURLProp != null)
|
||||||
if (engineURLProp != null)
|
{
|
||||||
{
|
engineURLs = engineURLProp.trim().split("\\s*,\\s*");
|
||||||
engineURLs = engineURLProp.trim().split("\\s*,\\s*");
|
}
|
||||||
}
|
|
||||||
|
if (engineURLProp == null || engineURLs == null
|
||||||
if (engineURLProp == null || engineURLs == null
|
|| engineURLs.length == 0 || engineURLs[0].trim().equals(""))
|
||||||
|| engineURLs.length == 0 || engineURLs[0].trim().equals(""))
|
{
|
||||||
{
|
log.warn("No search engine URLs configured to ping");
|
||||||
log.warn("No search engine URLs configured to ping");
|
return;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
for (int i = 0; i < engineURLs.length; i++)
|
||||||
for (int i = 0; i < engineURLs.length; i++)
|
{
|
||||||
{
|
try
|
||||||
try
|
{
|
||||||
{
|
pingSearchEngine(engineURLs[i]);
|
||||||
pingSearchEngine(engineURLs[i]);
|
}
|
||||||
}
|
catch (MalformedURLException me)
|
||||||
catch (MalformedURLException me)
|
{
|
||||||
{
|
log.warn("Bad search engine URL in configuration: "
|
||||||
log.warn("Bad search engine URL in configuration: "
|
+ engineURLs[i]);
|
||||||
+ engineURLs[i]);
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
/**
|
||||||
/**
|
* Ping the given search engine.
|
||||||
* Ping the given search engine.
|
*
|
||||||
*
|
* @param engineURL
|
||||||
* @param engineURL
|
* Search engine URL minus protocol etc, e.g.
|
||||||
* Search engine URL minus protocol etc, e.g.
|
* {@code www.google.com}
|
||||||
* {@code www.google.com}
|
* @return {@code true} if the operation was successful
|
||||||
* @return {@code true} if the operation was successful
|
* @throws MalformedURLException
|
||||||
* @throws MalformedURLException
|
* if the passed in URL is malformed
|
||||||
* if the passed in URL is malformed
|
* @throws UnsupportedEncodingException
|
||||||
* @throws UnsupportedEncodingException
|
* theoretically should never happen
|
||||||
* theoretically should never happen
|
*/
|
||||||
*/
|
public static void pingSearchEngine(String engineURL)
|
||||||
public static void pingSearchEngine(String engineURL)
|
throws MalformedURLException, UnsupportedEncodingException
|
||||||
throws MalformedURLException, UnsupportedEncodingException
|
{
|
||||||
{
|
// Set up HTTP proxy
|
||||||
// Set up HTTP proxy
|
if ((ConfigurationManager.getProperty("http.proxy.host") != null)
|
||||||
if ((ConfigurationManager.getProperty("http.proxy.host") != null)
|
&& (ConfigurationManager.getProperty("http.proxy.port") != null))
|
||||||
&& (ConfigurationManager.getProperty("http.proxy.port") != null))
|
{
|
||||||
{
|
System.setProperty("proxySet", "true");
|
||||||
System.setProperty("proxySet", "true");
|
System.setProperty("proxyHost", ConfigurationManager
|
||||||
System.setProperty("proxyHost", ConfigurationManager
|
.getProperty("http.proxy.host"));
|
||||||
.getProperty("http.proxy.host"));
|
System.getProperty("proxyPort", ConfigurationManager
|
||||||
System.getProperty("proxyPort", ConfigurationManager
|
.getProperty("http.proxy.port"));
|
||||||
.getProperty("http.proxy.port"));
|
}
|
||||||
}
|
|
||||||
|
String sitemapURL = ConfigurationManager.getProperty("dspace.url")
|
||||||
String sitemapURL = ConfigurationManager.getProperty("dspace.url")
|
+ "/sitemap";
|
||||||
+ "/sitemap";
|
|
||||||
|
URL url = new URL(engineURL + URLEncoder.encode(sitemapURL, "UTF-8"));
|
||||||
URL url = new URL(engineURL + URLEncoder.encode(sitemapURL, "UTF-8"));
|
|
||||||
|
try
|
||||||
try
|
{
|
||||||
{
|
HttpURLConnection connection = (HttpURLConnection) url
|
||||||
HttpURLConnection connection = (HttpURLConnection) url
|
.openConnection();
|
||||||
.openConnection();
|
|
||||||
|
BufferedReader in = new BufferedReader(new InputStreamReader(
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(
|
connection.getInputStream()));
|
||||||
connection.getInputStream()));
|
|
||||||
|
String inputLine;
|
||||||
String inputLine;
|
StringBuffer resp = new StringBuffer();
|
||||||
StringBuffer resp = new StringBuffer();
|
while ((inputLine = in.readLine()) != null)
|
||||||
while ((inputLine = in.readLine()) != null)
|
{
|
||||||
{
|
resp.append(inputLine).append("\n");
|
||||||
resp.append(inputLine).append("\n");
|
}
|
||||||
}
|
in.close();
|
||||||
in.close();
|
|
||||||
|
if (connection.getResponseCode() == 200)
|
||||||
if (connection.getResponseCode() == 200)
|
{
|
||||||
{
|
log.info("Pinged " + url.toString() + " successfully");
|
||||||
log.info("Pinged " + url.toString() + " successfully");
|
}
|
||||||
}
|
else
|
||||||
else
|
{
|
||||||
{
|
log.warn("Error response pinging " + url.toString() + ":\n"
|
||||||
log.warn("Error response pinging " + url.toString() + ":\n"
|
+ resp);
|
||||||
+ resp);
|
}
|
||||||
}
|
}
|
||||||
}
|
catch (IOException e)
|
||||||
catch (IOException e)
|
{
|
||||||
{
|
log.warn("Error pinging " + url.toString(), e);
|
||||||
log.warn("Error pinging " + url.toString(), e);
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
@@ -1,156 +1,155 @@
|
|||||||
/*
|
/*
|
||||||
* SitemapServlet.java
|
* SitemapServlet.java
|
||||||
*
|
*
|
||||||
* Version: $Revision$
|
* Version: $Revision$
|
||||||
*
|
*
|
||||||
* Date: $Date$
|
* Date: $Date$
|
||||||
*
|
*
|
||||||
* Copyright (c) 2006, Hewlett-Packard Company and Massachusetts
|
* Copyright (c) 2006, Hewlett-Packard Company and Massachusetts
|
||||||
* Institute of Technology. All rights reserved.
|
* Institute of Technology. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are
|
* modification, are permitted provided that the following conditions are
|
||||||
* met:
|
* met:
|
||||||
*
|
*
|
||||||
* - Redistributions of source code must retain the above copyright
|
* - Redistributions of source code must retain the above copyright
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* notice, this list of conditions and the following disclaimer.
|
||||||
*
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
* documentation and/or other materials provided with the distribution.
|
* documentation and/or other materials provided with the distribution.
|
||||||
*
|
*
|
||||||
* - Neither the name of the Hewlett-Packard Company nor the name of the
|
* - Neither the name of the Hewlett-Packard Company nor the name of the
|
||||||
* Massachusetts Institute of Technology nor the names of their
|
* Massachusetts Institute of Technology nor the names of their
|
||||||
* contributors may be used to endorse or promote products derived from
|
* contributors may be used to endorse or promote products derived from
|
||||||
* this software without specific prior written permission.
|
* this software without specific prior written permission.
|
||||||
*
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
* DAMAGE.
|
* DAMAGE.
|
||||||
*/
|
*/
|
||||||
package org.dspace.app.webui.servlet;
|
package org.dspace.app.webui.servlet;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.dspace.app.webui.util.JSPManager;
|
import org.dspace.app.webui.util.JSPManager;
|
||||||
import org.dspace.authorize.AuthorizeException;
|
import org.dspace.authorize.AuthorizeException;
|
||||||
import org.dspace.core.ConfigurationManager;
|
import org.dspace.core.ConfigurationManager;
|
||||||
import org.dspace.core.Context;
|
import org.dspace.core.Context;
|
||||||
import org.dspace.core.Utils;
|
import org.dspace.core.Utils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Servlet for retrieving sitemaps.
|
* Servlet for retrieving sitemaps.
|
||||||
* <P>
|
* <P>
|
||||||
* The servlet is configured via the "type" config parameter to serve either
|
* The servlet is configured via the "type" config parameter to serve either
|
||||||
* sitemaps.org or basic HTML sitemaps.
|
* sitemaps.org or basic HTML sitemaps.
|
||||||
* <P>
|
* <P>
|
||||||
* The "map" parameter specifies the index of a sitemap to serve. If no "map"
|
* The "map" parameter specifies the index of a sitemap to serve. If no "map"
|
||||||
* parameter is specified, the sitemap index is served.
|
* parameter is specified, the sitemap index is served.
|
||||||
*
|
*
|
||||||
* @author Stuart Lewis
|
* @author Stuart Lewis
|
||||||
* @author Robert Tansley
|
* @author Robert Tansley
|
||||||
* @version $Revision$
|
* @version $Revision$
|
||||||
*/
|
*/
|
||||||
public class SitemapServlet extends DSpaceServlet
|
public class SitemapServlet extends DSpaceServlet
|
||||||
{
|
{
|
||||||
/** log4j category */
|
/** log4j category */
|
||||||
private static Logger log = Logger.getLogger(SitemapServlet.class);
|
private static Logger log = Logger.getLogger(SitemapServlet.class);
|
||||||
|
|
||||||
/** true if we are for serving sitemap.org sitemaps, false otherwise */
|
/** true if we are for serving sitemap.org sitemaps, false otherwise */
|
||||||
private boolean forSitemapsOrg;
|
private boolean forSitemapsOrg;
|
||||||
|
|
||||||
public void init()
|
public void init()
|
||||||
{
|
{
|
||||||
forSitemapsOrg = false;
|
forSitemapsOrg = false;
|
||||||
|
|
||||||
String initParam = getInitParameter("type");
|
String initParam = getInitParameter("type");
|
||||||
|
|
||||||
if (initParam != null && initParam.equalsIgnoreCase("sitemaps.org"))
|
if (initParam != null && initParam.equalsIgnoreCase("sitemaps.org"))
|
||||||
{
|
{
|
||||||
forSitemapsOrg = true;
|
forSitemapsOrg = true;
|
||||||
}
|
}
|
||||||
else if (initParam == null || !initParam.equalsIgnoreCase("html"))
|
else if (initParam == null || !initParam.equalsIgnoreCase("html"))
|
||||||
{
|
{
|
||||||
log.warn("Invalid initialization parameter for servlet "
|
log.warn("Invalid initialization parameter for servlet "
|
||||||
+ getServletName() + ": assuming basic HTML");
|
+ getServletName() + ": assuming basic HTML");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void doDSGet(Context context, HttpServletRequest request,
|
protected void doDSGet(Context context, HttpServletRequest request,
|
||||||
HttpServletResponse response) throws ServletException, IOException,
|
HttpServletResponse response) throws ServletException, IOException,
|
||||||
SQLException, AuthorizeException
|
SQLException, AuthorizeException
|
||||||
{
|
{
|
||||||
String param = request.getParameter("map");
|
String param = request.getParameter("map");
|
||||||
|
|
||||||
String ext = (forSitemapsOrg ? ".xml.gz" : ".html");
|
String ext = (forSitemapsOrg ? ".xml.gz" : ".html");
|
||||||
String mimeType = (forSitemapsOrg ? "text/xml" : "text/html");
|
String mimeType = (forSitemapsOrg ? "text/xml" : "text/html");
|
||||||
String fileStem = (param == null ? "sitemap_index" : "sitemap" + param);
|
String fileStem = (param == null ? "sitemap_index" : "sitemap" + param);
|
||||||
|
|
||||||
sendFile(request, response, fileStem + ext, mimeType, forSitemapsOrg);
|
sendFile(request, response, fileStem + ext, mimeType, forSitemapsOrg);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void sendFile(HttpServletRequest request,
|
private void sendFile(HttpServletRequest request,
|
||||||
HttpServletResponse response, String file, String mimeType,
|
HttpServletResponse response, String file, String mimeType,
|
||||||
boolean compressed) throws ServletException, IOException
|
boolean compressed) throws ServletException, IOException
|
||||||
{
|
{
|
||||||
File f = new File(ConfigurationManager.getProperty("dspace.dir")
|
File f = new File(ConfigurationManager.getProperty("sitemap.dir"), file);
|
||||||
+ File.separator + "sitemaps", file);
|
|
||||||
|
if (!f.exists())
|
||||||
if (!f.exists())
|
{
|
||||||
{
|
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
|
||||||
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
|
JSPManager.showJSP(request, response, "/error/404.jsp");
|
||||||
JSPManager.showJSP(request, response, "/error/404.jsp");
|
return;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
long lastMod = f.lastModified();
|
||||||
long lastMod = f.lastModified();
|
response.setDateHeader("Last-Modified", lastMod);
|
||||||
response.setDateHeader("Last-Modified", lastMod);
|
|
||||||
|
// Check for if-modified-since header
|
||||||
// Check for if-modified-since header
|
long modSince = request.getDateHeader("If-Modified-Since");
|
||||||
long modSince = request.getDateHeader("If-Modified-Since");
|
|
||||||
|
if (modSince != -1 && lastMod < modSince)
|
||||||
if (modSince != -1 && lastMod < modSince)
|
{
|
||||||
{
|
// Sitemap file has not been modified since requested date,
|
||||||
// Sitemap file has not been modified since requested date,
|
// hence bitstream has not; return 304
|
||||||
// hence bitstream has not; return 304
|
response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
|
||||||
response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
|
return;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
if (compressed)
|
||||||
if (compressed)
|
{
|
||||||
{
|
response.setHeader("Content-Encoding", "gzip");
|
||||||
response.setHeader("Content-Encoding", "gzip");
|
}
|
||||||
}
|
|
||||||
|
// Pipe the bits
|
||||||
// Pipe the bits
|
InputStream is = new FileInputStream(f);
|
||||||
InputStream is = new FileInputStream(f);
|
|
||||||
|
// Set the response MIME type
|
||||||
// Set the response MIME type
|
response.setContentType(mimeType);
|
||||||
response.setContentType(mimeType);
|
|
||||||
|
// Response length
|
||||||
// Response length
|
response.setHeader("Content-Length", String.valueOf(f.length()));
|
||||||
response.setHeader("Content-Length", String.valueOf(f.length()));
|
|
||||||
|
Utils.bufferedCopy(is, response.getOutputStream());
|
||||||
Utils.bufferedCopy(is, response.getOutputStream());
|
is.close();
|
||||||
is.close();
|
response.getOutputStream().flush();
|
||||||
response.getOutputStream().flush();
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
@@ -1,223 +1,222 @@
|
|||||||
/*
|
/*
|
||||||
* SitemapReader.java
|
* SitemapReader.java
|
||||||
*
|
*
|
||||||
* Version: $Revision: 1.2 $
|
* Version: $Revision: 1.2 $
|
||||||
*
|
*
|
||||||
* Date: $Date: 2006/04/25 15:24:23 $
|
* Date: $Date: 2006/04/25 15:24:23 $
|
||||||
*
|
*
|
||||||
* Copyright (c) 2002, Hewlett-Packard Company and Massachusetts
|
* Copyright (c) 2002, Hewlett-Packard Company and Massachusetts
|
||||||
* Institute of Technology. All rights reserved.
|
* Institute of Technology. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are
|
* modification, are permitted provided that the following conditions are
|
||||||
* met:
|
* met:
|
||||||
*
|
*
|
||||||
* - Redistributions of source code must retain the above copyright
|
* - Redistributions of source code must retain the above copyright
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* notice, this list of conditions and the following disclaimer.
|
||||||
*
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
* documentation and/or other materials provided with the distribution.
|
* documentation and/or other materials provided with the distribution.
|
||||||
*
|
*
|
||||||
* - Neither the name of the Hewlett-Packard Company nor the name of the
|
* - Neither the name of the Hewlett-Packard Company nor the name of the
|
||||||
* Massachusetts Institute of Technology nor the names of their
|
* Massachusetts Institute of Technology nor the names of their
|
||||||
* contributors may be used to endorse or promote products derived from
|
* contributors may be used to endorse or promote products derived from
|
||||||
* this software without specific prior written permission.
|
* this software without specific prior written permission.
|
||||||
*
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
* DAMAGE.
|
* DAMAGE.
|
||||||
*/
|
*/
|
||||||
package org.dspace.app.xmlui.cocoon;
|
package org.dspace.app.xmlui.cocoon;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
import org.apache.avalon.excalibur.pool.Recyclable;
|
import org.apache.avalon.excalibur.pool.Recyclable;
|
||||||
import org.apache.avalon.framework.parameters.Parameters;
|
import org.apache.avalon.framework.parameters.Parameters;
|
||||||
import org.apache.cocoon.ProcessingException;
|
import org.apache.cocoon.ProcessingException;
|
||||||
import org.apache.cocoon.ResourceNotFoundException;
|
import org.apache.cocoon.ResourceNotFoundException;
|
||||||
import org.apache.cocoon.environment.ObjectModelHelper;
|
import org.apache.cocoon.environment.ObjectModelHelper;
|
||||||
import org.apache.cocoon.environment.Request;
|
import org.apache.cocoon.environment.Request;
|
||||||
import org.apache.cocoon.environment.Response;
|
import org.apache.cocoon.environment.Response;
|
||||||
import org.apache.cocoon.environment.SourceResolver;
|
import org.apache.cocoon.environment.SourceResolver;
|
||||||
import org.apache.cocoon.environment.http.HttpEnvironment;
|
import org.apache.cocoon.environment.http.HttpEnvironment;
|
||||||
import org.apache.cocoon.reading.AbstractReader;
|
import org.apache.cocoon.reading.AbstractReader;
|
||||||
import org.dspace.core.ConfigurationManager;
|
import org.dspace.core.ConfigurationManager;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.dspace.core.Utils;
|
import org.dspace.core.Utils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class will read a generated Sitemap (www.sitemaps.org or HTML sitemap)
|
* Class will read a generated Sitemap (www.sitemaps.org or HTML sitemap)
|
||||||
* from [dspace]/sitemaps/ and serve it up to the requesting Search Engine.
|
* from [dspace]/sitemaps/ and serve it up to the requesting Search Engine.
|
||||||
*
|
*
|
||||||
* Sitemaps are generated by running the [dspace]/bin/generate-sitemaps script.
|
* Sitemaps are generated by running the [dspace]/bin/generate-sitemaps script.
|
||||||
*
|
*
|
||||||
* There are essentially two types of Sitemaps:
|
* There are essentially two types of Sitemaps:
|
||||||
*
|
*
|
||||||
* (1) Basic HTML Sitemaps
|
* (1) Basic HTML Sitemaps
|
||||||
*
|
*
|
||||||
* path = "/htmlmap"
|
* path = "/htmlmap"
|
||||||
*
|
*
|
||||||
* <map:read type="SitemapReader">
|
* <map:read type="SitemapReader">
|
||||||
* <map:parameter name="type" value="html"/>
|
* <map:parameter name="type" value="html"/>
|
||||||
* </map:read>
|
* </map:read>
|
||||||
*
|
*
|
||||||
* (2) Sitemaps.org XML Sitemaps
|
* (2) Sitemaps.org XML Sitemaps
|
||||||
*
|
*
|
||||||
* path = "/sitemap"
|
* path = "/sitemap"
|
||||||
*
|
*
|
||||||
* <map:read type="SitemapReader">
|
* <map:read type="SitemapReader">
|
||||||
* <map:parameter name="type" value="sitemaps.org"/>
|
* <map:parameter name="type" value="sitemaps.org"/>
|
||||||
* </map:read>
|
* </map:read>
|
||||||
*
|
*
|
||||||
* @author Tim Donohue
|
* @author Tim Donohue
|
||||||
*/
|
*/
|
||||||
public class SitemapReader extends AbstractReader implements Recyclable
|
public class SitemapReader extends AbstractReader implements Recyclable
|
||||||
{
|
{
|
||||||
private static Logger log = Logger.getLogger(SitemapReader.class);
|
private static Logger log = Logger.getLogger(SitemapReader.class);
|
||||||
|
|
||||||
/** The Cocoon response */
|
/** The Cocoon response */
|
||||||
protected Response response;
|
protected Response response;
|
||||||
|
|
||||||
/** The Cocoon request */
|
/** The Cocoon request */
|
||||||
protected Request request;
|
protected Request request;
|
||||||
|
|
||||||
/** The sitemap's mime-type */
|
/** The sitemap's mime-type */
|
||||||
protected String sitemapMimeType;
|
protected String sitemapMimeType;
|
||||||
|
|
||||||
/** true if we are for serving sitemap.org sitemaps, false otherwise */
|
/** true if we are for serving sitemap.org sitemaps, false otherwise */
|
||||||
private boolean forSitemapsOrg = false;
|
private boolean forSitemapsOrg = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the bitstream reader.
|
* Set up the bitstream reader.
|
||||||
*
|
*
|
||||||
* See the class description for information on configuration options.
|
* See the class description for information on configuration options.
|
||||||
*/
|
*/
|
||||||
public void setup(SourceResolver resolver, Map objectModel, String src,
|
public void setup(SourceResolver resolver, Map objectModel, String src,
|
||||||
Parameters par) throws ProcessingException, SAXException,
|
Parameters par) throws ProcessingException, SAXException,
|
||||||
IOException
|
IOException
|
||||||
{
|
{
|
||||||
super.setup(resolver, objectModel, src, par);
|
super.setup(resolver, objectModel, src, par);
|
||||||
|
|
||||||
this.request = ObjectModelHelper.getRequest(objectModel);
|
this.request = ObjectModelHelper.getRequest(objectModel);
|
||||||
this.response = ObjectModelHelper.getResponse(objectModel);
|
this.response = ObjectModelHelper.getResponse(objectModel);
|
||||||
this.forSitemapsOrg = false;
|
this.forSitemapsOrg = false;
|
||||||
|
|
||||||
// Get our parameter that identifies type of sitemap (default to HTML sitemap)
|
// Get our parameter that identifies type of sitemap (default to HTML sitemap)
|
||||||
String type = par.getParameter("type", "html");
|
String type = par.getParameter("type", "html");
|
||||||
|
|
||||||
if (type != null && type.equalsIgnoreCase("sitemaps.org"))
|
if (type != null && type.equalsIgnoreCase("sitemaps.org"))
|
||||||
{
|
{
|
||||||
this.forSitemapsOrg = true;
|
this.forSitemapsOrg = true;
|
||||||
}
|
}
|
||||||
else if (type == null || !type.equalsIgnoreCase("html"))
|
else if (type == null || !type.equalsIgnoreCase("html"))
|
||||||
{
|
{
|
||||||
log.warn("Invalid initialization parameter for sitemapReader: assuming basic HTML");
|
log.warn("Invalid initialization parameter for sitemapReader: assuming basic HTML");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate the output. Determine which type of sitemap is being
|
* Generate the output. Determine which type of sitemap is being
|
||||||
* requested and setup the main request parameters
|
* requested and setup the main request parameters
|
||||||
*/
|
*/
|
||||||
public void generate() throws IOException, ProcessingException
|
public void generate() throws IOException, ProcessingException
|
||||||
{
|
{
|
||||||
String param = this.request.getParameter("map");
|
String param = this.request.getParameter("map");
|
||||||
|
|
||||||
String ext = (this.forSitemapsOrg ? ".xml.gz" : ".html");
|
String ext = (this.forSitemapsOrg ? ".xml.gz" : ".html");
|
||||||
this.sitemapMimeType = (this.forSitemapsOrg ? "text/xml" : "text/html");
|
this.sitemapMimeType = (this.forSitemapsOrg ? "text/xml" : "text/html");
|
||||||
String fileStem = (param == null ? "sitemap_index" : "sitemap" + param);
|
String fileStem = (param == null ? "sitemap_index" : "sitemap" + param);
|
||||||
|
|
||||||
sendFile(fileStem + ext, this.forSitemapsOrg);
|
sendFile(fileStem + ext, this.forSitemapsOrg);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write the actual pre-generated Sitemap data out to the response.
|
* Write the actual pre-generated Sitemap data out to the response.
|
||||||
*
|
*
|
||||||
* @param file the actual file to send
|
* @param file the actual file to send
|
||||||
* @param compressed true if file should be compressed
|
* @param compressed true if file should be compressed
|
||||||
*/
|
*/
|
||||||
private void sendFile(String file,
|
private void sendFile(String file,
|
||||||
boolean compressed) throws IOException, ResourceNotFoundException
|
boolean compressed) throws IOException, ResourceNotFoundException
|
||||||
{
|
{
|
||||||
File f = new File(ConfigurationManager.getProperty("dspace.dir")
|
File f = new File(ConfigurationManager.getProperty("sitemap.dir"), file);
|
||||||
+ File.separator + "sitemaps", file);
|
|
||||||
|
HttpServletResponse httpResponse = (HttpServletResponse) objectModel.get(HttpEnvironment.HTTP_RESPONSE_OBJECT);
|
||||||
HttpServletResponse httpResponse = (HttpServletResponse) objectModel.get(HttpEnvironment.HTTP_RESPONSE_OBJECT);
|
|
||||||
|
if (!f.exists())
|
||||||
if (!f.exists())
|
{
|
||||||
{
|
httpResponse.setStatus(HttpServletResponse.SC_NOT_FOUND);
|
||||||
httpResponse.setStatus(HttpServletResponse.SC_NOT_FOUND);
|
throw new ResourceNotFoundException("Unable to locate sitemap");
|
||||||
throw new ResourceNotFoundException("Unable to locate sitemap");
|
}
|
||||||
}
|
|
||||||
|
long lastMod = f.lastModified();
|
||||||
long lastMod = f.lastModified();
|
this.response.setDateHeader("Last-Modified", lastMod);
|
||||||
this.response.setDateHeader("Last-Modified", lastMod);
|
|
||||||
|
// Check for if-modified-since header
|
||||||
// Check for if-modified-since header
|
long modSince = this.request.getDateHeader("If-Modified-Since");
|
||||||
long modSince = this.request.getDateHeader("If-Modified-Since");
|
|
||||||
|
if (modSince != -1 && lastMod < modSince)
|
||||||
if (modSince != -1 && lastMod < modSince)
|
{
|
||||||
{
|
// Sitemap file has not been modified since requested date,
|
||||||
// Sitemap file has not been modified since requested date,
|
// hence bitstream has not; return 304
|
||||||
// hence bitstream has not; return 304
|
httpResponse.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
|
||||||
httpResponse.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
|
return;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
if (compressed)
|
||||||
if (compressed)
|
{
|
||||||
{
|
this.response.setHeader("Content-Encoding", "gzip");
|
||||||
this.response.setHeader("Content-Encoding", "gzip");
|
}
|
||||||
}
|
|
||||||
|
// Pipe the bits
|
||||||
// Pipe the bits
|
InputStream is = new FileInputStream(f);
|
||||||
InputStream is = new FileInputStream(f);
|
|
||||||
|
// Set the response MIME type
|
||||||
// Set the response MIME type
|
response.setHeader("Content-Type", this.sitemapMimeType);
|
||||||
response.setHeader("Content-Type", this.sitemapMimeType);
|
|
||||||
|
// Response length
|
||||||
// Response length
|
this.response.setHeader("Content-Length", String.valueOf(f.length()));
|
||||||
this.response.setHeader("Content-Length", String.valueOf(f.length()));
|
|
||||||
|
Utils.bufferedCopy(is, this.out);
|
||||||
Utils.bufferedCopy(is, this.out);
|
is.close();
|
||||||
is.close();
|
this.out.flush();
|
||||||
this.out.flush();
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
/**
|
||||||
/**
|
* Returns the mime-type of the sitemap
|
||||||
* Returns the mime-type of the sitemap
|
*/
|
||||||
*/
|
public String getMimeType()
|
||||||
public String getMimeType()
|
{
|
||||||
{
|
return this.sitemapMimeType;
|
||||||
return this.sitemapMimeType;
|
}
|
||||||
}
|
|
||||||
|
/**
|
||||||
/**
|
* Recycle
|
||||||
* Recycle
|
*/
|
||||||
*/
|
public void recycle() {
|
||||||
public void recycle() {
|
this.response = null;
|
||||||
this.response = null;
|
this.request = null;
|
||||||
this.request = null;
|
this.sitemapMimeType = null;
|
||||||
this.sitemapMimeType = null;
|
this.forSitemapsOrg = false;
|
||||||
this.forSitemapsOrg = false;
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
@@ -17,6 +17,7 @@
|
|||||||
- [2513300] Fix for bug [1774958] Nested folders do not export correctly
|
- [2513300] Fix for bug [1774958] Nested folders do not export correctly
|
||||||
- Fix for SF bug [2086481] xmlui Administrative log in as another eperson
|
- Fix for SF bug [2086481] xmlui Administrative log in as another eperson
|
||||||
- Fix for SF bug [2155479] Submission verify page handles dc.identifier.* incorrectly
|
- Fix for SF bug [2155479] Submission verify page handles dc.identifier.* incorrectly
|
||||||
|
- SF patch [2560974] for SF feature request [2560839] Make sitemap directory configurable
|
||||||
|
|
||||||
(Stuart Lewis / Chris Yates / Flavio Botelho / Alex Barbieri / Reuben Pasquini / Paulo Matos)
|
(Stuart Lewis / Chris Yates / Flavio Botelho / Alex Barbieri / Reuben Pasquini / Paulo Matos)
|
||||||
- [2057378] Hierarchical LDAP support
|
- [2057378] Hierarchical LDAP support
|
||||||
|
@@ -1044,6 +1044,9 @@ xmlui.content_disposition_threshold = 8388608
|
|||||||
|
|
||||||
|
|
||||||
#### Sitemap settings #####
|
#### Sitemap settings #####
|
||||||
|
# the directory where the generated sitemaps are stored
|
||||||
|
sitemap.dir = ${dspace.dir}/sitemaps
|
||||||
|
|
||||||
#
|
#
|
||||||
# Comma-separated list of search engine URLs to 'ping' when a new Sitemap has
|
# Comma-separated list of search engine URLs to 'ping' when a new Sitemap has
|
||||||
# been created. Include everything except the Sitemap URL itself (which will
|
# been created. Include everything except the Sitemap URL itself (which will
|
||||||
|
Reference in New Issue
Block a user