SF patch [2560974] for SF feature request [2560839] Make sitemap directory configurable

git-svn-id: http://scm.dspace.org/svn/repo/branches/dspace-1_5_x@3471 9c30dcfa-912a-0410-8fc2-9e0234be79fd
This commit is contained in:
Claudia Juergen
2009-02-11 09:54:23 +00:00
parent c76ddaf003
commit 277383c20d
5 changed files with 764 additions and 763 deletions

View File

@@ -1,384 +1,383 @@
/* /*
* GenerateSitemaps.java * GenerateSitemaps.java
* *
* Version: $Revision: 1.1 $ * Version: $Revision: 1.1 $
* *
* Date: $Date: 2006/03/17 00:04:38 $ * Date: $Date: 2006/03/17 00:04:38 $
* *
* Copyright (c) 2002-2006, Hewlett-Packard Company and Massachusetts * Copyright (c) 2002-2006, Hewlett-Packard Company and Massachusetts
* Institute of Technology. All rights reserved. * Institute of Technology. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are * modification, are permitted provided that the following conditions are
* met: * met:
* *
* - Redistributions of source code must retain the above copyright * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer. * notice, this list of conditions and the following disclaimer.
* *
* - Redistributions in binary form must reproduce the above copyright * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* *
* - Neither the name of the Hewlett-Packard Company nor the name of the * - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their * Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from * contributors may be used to endorse or promote products derived from
* this software without specific prior written permission. * this software without specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE. * DAMAGE.
*/ */
package org.dspace.app.sitemap; package org.dspace.app.sitemap;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.Date; import java.util.Date;
import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options; import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException; import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser; import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.dspace.content.Collection; import org.dspace.content.Collection;
import org.dspace.content.Community; import org.dspace.content.Community;
import org.dspace.content.Item; import org.dspace.content.Item;
import org.dspace.content.ItemIterator; import org.dspace.content.ItemIterator;
import org.dspace.core.ConfigurationManager; import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.core.LogManager; import org.dspace.core.LogManager;
/** /**
* Command-line utility for generating HTML and Sitemaps.org protocol Sitemaps. * Command-line utility for generating HTML and Sitemaps.org protocol Sitemaps.
* *
* @author Robert Tansley * @author Robert Tansley
* @author Stuart Lewis * @author Stuart Lewis
*/ */
public class GenerateSitemaps public class GenerateSitemaps
{ {
/** Logger */ /** Logger */
private static Logger log = Logger.getLogger(GenerateSitemaps.class); private static Logger log = Logger.getLogger(GenerateSitemaps.class);
public static void main(String[] args) throws Exception public static void main(String[] args) throws Exception
{ {
final String usage = GenerateSitemaps.class.getCanonicalName(); final String usage = GenerateSitemaps.class.getCanonicalName();
CommandLineParser parser = new PosixParser(); CommandLineParser parser = new PosixParser();
HelpFormatter hf = new HelpFormatter(); HelpFormatter hf = new HelpFormatter();
Options options = new Options(); Options options = new Options();
options.addOption("h", "help", false, "help"); options.addOption("h", "help", false, "help");
options.addOption("s", "no_sitemaps", false, options.addOption("s", "no_sitemaps", false,
"do not generate sitemaps.org protocol sitemap"); "do not generate sitemaps.org protocol sitemap");
options.addOption("b", "no_htmlmap", false, options.addOption("b", "no_htmlmap", false,
"do not generate a basic HTML sitemap"); "do not generate a basic HTML sitemap");
options.addOption("a", "ping_all", false, options.addOption("a", "ping_all", false,
"ping configured search engines"); "ping configured search engines");
options options
.addOption("p", "ping", true, .addOption("p", "ping", true,
"ping specified search engine URL"); "ping specified search engine URL");
CommandLine line = null; CommandLine line = null;
try try
{ {
line = parser.parse(options, args); line = parser.parse(options, args);
} }
catch (ParseException pe) catch (ParseException pe)
{ {
hf.printHelp(usage, options); hf.printHelp(usage, options);
System.exit(1); System.exit(1);
} }
if (line.hasOption('h')) if (line.hasOption('h'))
{ {
hf.printHelp(usage, options); hf.printHelp(usage, options);
System.exit(0); System.exit(0);
} }
if (line.getArgs().length != 0) if (line.getArgs().length != 0)
{ {
hf.printHelp(usage, options); hf.printHelp(usage, options);
System.exit(1); System.exit(1);
} }
/* /*
* Sanity check -- if no sitemap generation or pinging to do, print * Sanity check -- if no sitemap generation or pinging to do, print
* usage * usage
*/ */
if (line.getArgs().length != 0 || line.hasOption('b') if (line.getArgs().length != 0 || line.hasOption('b')
&& line.hasOption('s') && !line.hasOption('g') && line.hasOption('s') && !line.hasOption('g')
&& !line.hasOption('m') && !line.hasOption('y') && !line.hasOption('m') && !line.hasOption('y')
&& !line.hasOption('p')) && !line.hasOption('p'))
{ {
System.err System.err
.println("Nothing to do (no sitemap to generate, no search engines to ping)"); .println("Nothing to do (no sitemap to generate, no search engines to ping)");
hf.printHelp(usage, options); hf.printHelp(usage, options);
System.exit(1); System.exit(1);
} }
// Note the negation (CLI options indicate NOT to generate a sitemap) // Note the negation (CLI options indicate NOT to generate a sitemap)
if (!line.hasOption('b') || !line.hasOption('s')) if (!line.hasOption('b') || !line.hasOption('s'))
{ {
generateSitemaps(!line.hasOption('b'), !line.hasOption('s')); generateSitemaps(!line.hasOption('b'), !line.hasOption('s'));
} }
if (line.hasOption('a')) if (line.hasOption('a'))
{ {
pingConfiguredSearchEngines(); pingConfiguredSearchEngines();
} }
if (line.hasOption('p')) if (line.hasOption('p'))
{ {
try try
{ {
pingSearchEngine(line.getOptionValue('p')); pingSearchEngine(line.getOptionValue('p'));
} }
catch (MalformedURLException me) catch (MalformedURLException me)
{ {
System.err System.err
.println("Bad search engine URL (include all except sitemap URL)"); .println("Bad search engine URL (include all except sitemap URL)");
System.exit(1); System.exit(1);
} }
} }
System.exit(0); System.exit(0);
} }
/** /**
* Generate sitemap.org protocol and/or basic HTML sitemaps. * Generate sitemap.org protocol and/or basic HTML sitemaps.
* *
* @param makeHTMLMap * @param makeHTMLMap
* if {@code true}, generate an HTML sitemap. * if {@code true}, generate an HTML sitemap.
* @param makeSitemapOrg * @param makeSitemapOrg
* if {@code true}, generate an sitemap.org sitemap. * if {@code true}, generate an sitemap.org sitemap.
* @throws SQLException * @throws SQLException
* if a database error occurs. * if a database error occurs.
* @throws IOException * @throws IOException
* if IO error occurs. * if IO error occurs.
*/ */
public static void generateSitemaps(boolean makeHTMLMap, public static void generateSitemaps(boolean makeHTMLMap,
boolean makeSitemapOrg) throws SQLException, IOException boolean makeSitemapOrg) throws SQLException, IOException
{ {
String sitemapStem = ConfigurationManager.getProperty("dspace.url") String sitemapStem = ConfigurationManager.getProperty("dspace.url")
+ "/sitemap"; + "/sitemap";
String htmlMapStem = ConfigurationManager.getProperty("dspace.url") String htmlMapStem = ConfigurationManager.getProperty("dspace.url")
+ "/htmlmap"; + "/htmlmap";
String handleURLStem = ConfigurationManager.getProperty("dspace.url") String handleURLStem = ConfigurationManager.getProperty("dspace.url")
+ "/handle/"; + "/handle/";
File outputDir = new File(ConfigurationManager File outputDir = new File(ConfigurationManager.getProperty("sitemap.dir"));
.getProperty("dspace.dir"), "sitemaps"); if (!outputDir.exists()) {
if (!outputDir.exists()) { outputDir.mkdir();
outputDir.mkdir(); }
}
AbstractGenerator html = null;
AbstractGenerator html = null; AbstractGenerator sitemapsOrg = null;
AbstractGenerator sitemapsOrg = null;
if (makeHTMLMap)
if (makeHTMLMap) {
{ html = new HTMLSitemapGenerator(outputDir, htmlMapStem + "?map=",
html = new HTMLSitemapGenerator(outputDir, htmlMapStem + "?map=", null);
null); }
}
if (makeSitemapOrg)
if (makeSitemapOrg) {
{ sitemapsOrg = new SitemapsOrgGenerator(outputDir, sitemapStem
sitemapsOrg = new SitemapsOrgGenerator(outputDir, sitemapStem + "?map=", null);
+ "?map=", null); }
}
Context c = new Context();
Context c = new Context();
Community[] comms = Community.findAll(c);
Community[] comms = Community.findAll(c);
for (int i = 0; i < comms.length; i++)
for (int i = 0; i < comms.length; i++) {
{ String url = handleURLStem + comms[i].getHandle();
String url = handleURLStem + comms[i].getHandle();
if (makeHTMLMap)
if (makeHTMLMap) html.addURL(url, null);
html.addURL(url, null); if (makeSitemapOrg)
if (makeSitemapOrg) sitemapsOrg.addURL(url, null);
sitemapsOrg.addURL(url, null); }
}
Collection[] colls = Collection.findAll(c);
Collection[] colls = Collection.findAll(c);
for (int i = 0; i < colls.length; i++)
for (int i = 0; i < colls.length; i++) {
{ String url = handleURLStem + colls[i].getHandle();
String url = handleURLStem + colls[i].getHandle();
if (makeHTMLMap)
if (makeHTMLMap) html.addURL(url, null);
html.addURL(url, null); if (makeSitemapOrg)
if (makeSitemapOrg) sitemapsOrg.addURL(url, null);
sitemapsOrg.addURL(url, null); }
}
ItemIterator allItems = Item.findAll(c);
ItemIterator allItems = Item.findAll(c); try
try {
{ int itemCount = 0;
int itemCount = 0;
while (allItems.hasNext())
while (allItems.hasNext()) {
{ Item i = allItems.next();
Item i = allItems.next(); String url = handleURLStem + i.getHandle();
String url = handleURLStem + i.getHandle(); Date lastMod = i.getLastModified();
Date lastMod = i.getLastModified();
if (makeHTMLMap)
if (makeHTMLMap) html.addURL(url, lastMod);
html.addURL(url, lastMod); if (makeSitemapOrg)
if (makeSitemapOrg) sitemapsOrg.addURL(url, lastMod);
sitemapsOrg.addURL(url, lastMod); i.decache();
i.decache();
itemCount++;
itemCount++; }
}
if (makeHTMLMap)
if (makeHTMLMap) {
{ int files = html.finish();
int files = html.finish(); log.info(LogManager.getHeader(c, "write_sitemap",
log.info(LogManager.getHeader(c, "write_sitemap", "type=html,num_files=" + files + ",communities="
"type=html,num_files=" + files + ",communities=" + comms.length + ",collections=" + colls.length
+ comms.length + ",collections=" + colls.length + ",items=" + itemCount));
+ ",items=" + itemCount)); }
}
if (makeSitemapOrg)
if (makeSitemapOrg) {
{ int files = sitemapsOrg.finish();
int files = sitemapsOrg.finish(); log.info(LogManager.getHeader(c, "write_sitemap",
log.info(LogManager.getHeader(c, "write_sitemap", "type=html,num_files=" + files + ",communities="
"type=html,num_files=" + files + ",communities=" + comms.length + ",collections=" + colls.length
+ comms.length + ",collections=" + colls.length + ",items=" + itemCount));
+ ",items=" + itemCount)); }
} }
} finally
finally {
{ if (allItems != null)
if (allItems != null) allItems.close();
allItems.close(); }
}
c.abort();
c.abort(); }
}
/**
/** * Ping all search engines configured in {@code dspace.cfg}.
* Ping all search engines configured in {@code dspace.cfg}. *
* * @throws UnsupportedEncodingException
* @throws UnsupportedEncodingException * theoretically should never happen
* theoretically should never happen */
*/ public static void pingConfiguredSearchEngines()
public static void pingConfiguredSearchEngines() throws UnsupportedEncodingException
throws UnsupportedEncodingException {
{ String engineURLProp = ConfigurationManager
String engineURLProp = ConfigurationManager .getProperty("sitemap.engineurls");
.getProperty("sitemap.engineurls"); String engineURLs[] = null;
String engineURLs[] = null;
if (engineURLProp != null)
if (engineURLProp != null) {
{ engineURLs = engineURLProp.trim().split("\\s*,\\s*");
engineURLs = engineURLProp.trim().split("\\s*,\\s*"); }
}
if (engineURLProp == null || engineURLs == null
if (engineURLProp == null || engineURLs == null || engineURLs.length == 0 || engineURLs[0].trim().equals(""))
|| engineURLs.length == 0 || engineURLs[0].trim().equals("")) {
{ log.warn("No search engine URLs configured to ping");
log.warn("No search engine URLs configured to ping"); return;
return; }
}
for (int i = 0; i < engineURLs.length; i++)
for (int i = 0; i < engineURLs.length; i++) {
{ try
try {
{ pingSearchEngine(engineURLs[i]);
pingSearchEngine(engineURLs[i]); }
} catch (MalformedURLException me)
catch (MalformedURLException me) {
{ log.warn("Bad search engine URL in configuration: "
log.warn("Bad search engine URL in configuration: " + engineURLs[i]);
+ engineURLs[i]); }
} }
} }
}
/**
/** * Ping the given search engine.
* Ping the given search engine. *
* * @param engineURL
* @param engineURL * Search engine URL minus protocol etc, e.g.
* Search engine URL minus protocol etc, e.g. * {@code www.google.com}
* {@code www.google.com} * @return {@code true} if the operation was successful
* @return {@code true} if the operation was successful * @throws MalformedURLException
* @throws MalformedURLException * if the passed in URL is malformed
* if the passed in URL is malformed * @throws UnsupportedEncodingException
* @throws UnsupportedEncodingException * theoretically should never happen
* theoretically should never happen */
*/ public static void pingSearchEngine(String engineURL)
public static void pingSearchEngine(String engineURL) throws MalformedURLException, UnsupportedEncodingException
throws MalformedURLException, UnsupportedEncodingException {
{ // Set up HTTP proxy
// Set up HTTP proxy if ((ConfigurationManager.getProperty("http.proxy.host") != null)
if ((ConfigurationManager.getProperty("http.proxy.host") != null) && (ConfigurationManager.getProperty("http.proxy.port") != null))
&& (ConfigurationManager.getProperty("http.proxy.port") != null)) {
{ System.setProperty("proxySet", "true");
System.setProperty("proxySet", "true"); System.setProperty("proxyHost", ConfigurationManager
System.setProperty("proxyHost", ConfigurationManager .getProperty("http.proxy.host"));
.getProperty("http.proxy.host")); System.getProperty("proxyPort", ConfigurationManager
System.getProperty("proxyPort", ConfigurationManager .getProperty("http.proxy.port"));
.getProperty("http.proxy.port")); }
}
String sitemapURL = ConfigurationManager.getProperty("dspace.url")
String sitemapURL = ConfigurationManager.getProperty("dspace.url") + "/sitemap";
+ "/sitemap";
URL url = new URL(engineURL + URLEncoder.encode(sitemapURL, "UTF-8"));
URL url = new URL(engineURL + URLEncoder.encode(sitemapURL, "UTF-8"));
try
try {
{ HttpURLConnection connection = (HttpURLConnection) url
HttpURLConnection connection = (HttpURLConnection) url .openConnection();
.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(
BufferedReader in = new BufferedReader(new InputStreamReader( connection.getInputStream()));
connection.getInputStream()));
String inputLine;
String inputLine; StringBuffer resp = new StringBuffer();
StringBuffer resp = new StringBuffer(); while ((inputLine = in.readLine()) != null)
while ((inputLine = in.readLine()) != null) {
{ resp.append(inputLine).append("\n");
resp.append(inputLine).append("\n"); }
} in.close();
in.close();
if (connection.getResponseCode() == 200)
if (connection.getResponseCode() == 200) {
{ log.info("Pinged " + url.toString() + " successfully");
log.info("Pinged " + url.toString() + " successfully"); }
} else
else {
{ log.warn("Error response pinging " + url.toString() + ":\n"
log.warn("Error response pinging " + url.toString() + ":\n" + resp);
+ resp); }
} }
} catch (IOException e)
catch (IOException e) {
{ log.warn("Error pinging " + url.toString(), e);
log.warn("Error pinging " + url.toString(), e); }
} }
} }
}

View File

@@ -1,156 +1,155 @@
/* /*
* SitemapServlet.java * SitemapServlet.java
* *
* Version: $Revision$ * Version: $Revision$
* *
* Date: $Date$ * Date: $Date$
* *
* Copyright (c) 2006, Hewlett-Packard Company and Massachusetts * Copyright (c) 2006, Hewlett-Packard Company and Massachusetts
* Institute of Technology. All rights reserved. * Institute of Technology. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are * modification, are permitted provided that the following conditions are
* met: * met:
* *
* - Redistributions of source code must retain the above copyright * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer. * notice, this list of conditions and the following disclaimer.
* *
* - Redistributions in binary form must reproduce the above copyright * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* *
* - Neither the name of the Hewlett-Packard Company nor the name of the * - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their * Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from * contributors may be used to endorse or promote products derived from
* this software without specific prior written permission. * this software without specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE. * DAMAGE.
*/ */
package org.dspace.app.webui.servlet; package org.dspace.app.webui.servlet;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.sql.SQLException; import java.sql.SQLException;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.dspace.app.webui.util.JSPManager; import org.dspace.app.webui.util.JSPManager;
import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeException;
import org.dspace.core.ConfigurationManager; import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.core.Utils; import org.dspace.core.Utils;
/** /**
* Servlet for retrieving sitemaps. * Servlet for retrieving sitemaps.
* <P> * <P>
* The servlet is configured via the "type" config parameter to serve either * The servlet is configured via the "type" config parameter to serve either
* sitemaps.org or basic HTML sitemaps. * sitemaps.org or basic HTML sitemaps.
* <P> * <P>
* The "map" parameter specifies the index of a sitemap to serve. If no "map" * The "map" parameter specifies the index of a sitemap to serve. If no "map"
* parameter is specified, the sitemap index is served. * parameter is specified, the sitemap index is served.
* *
* @author Stuart Lewis * @author Stuart Lewis
* @author Robert Tansley * @author Robert Tansley
* @version $Revision$ * @version $Revision$
*/ */
public class SitemapServlet extends DSpaceServlet public class SitemapServlet extends DSpaceServlet
{ {
/** log4j category */ /** log4j category */
private static Logger log = Logger.getLogger(SitemapServlet.class); private static Logger log = Logger.getLogger(SitemapServlet.class);
/** true if we are for serving sitemap.org sitemaps, false otherwise */ /** true if we are for serving sitemap.org sitemaps, false otherwise */
private boolean forSitemapsOrg; private boolean forSitemapsOrg;
public void init() public void init()
{ {
forSitemapsOrg = false; forSitemapsOrg = false;
String initParam = getInitParameter("type"); String initParam = getInitParameter("type");
if (initParam != null && initParam.equalsIgnoreCase("sitemaps.org")) if (initParam != null && initParam.equalsIgnoreCase("sitemaps.org"))
{ {
forSitemapsOrg = true; forSitemapsOrg = true;
} }
else if (initParam == null || !initParam.equalsIgnoreCase("html")) else if (initParam == null || !initParam.equalsIgnoreCase("html"))
{ {
log.warn("Invalid initialization parameter for servlet " log.warn("Invalid initialization parameter for servlet "
+ getServletName() + ": assuming basic HTML"); + getServletName() + ": assuming basic HTML");
} }
} }
protected void doDSGet(Context context, HttpServletRequest request, protected void doDSGet(Context context, HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException, HttpServletResponse response) throws ServletException, IOException,
SQLException, AuthorizeException SQLException, AuthorizeException
{ {
String param = request.getParameter("map"); String param = request.getParameter("map");
String ext = (forSitemapsOrg ? ".xml.gz" : ".html"); String ext = (forSitemapsOrg ? ".xml.gz" : ".html");
String mimeType = (forSitemapsOrg ? "text/xml" : "text/html"); String mimeType = (forSitemapsOrg ? "text/xml" : "text/html");
String fileStem = (param == null ? "sitemap_index" : "sitemap" + param); String fileStem = (param == null ? "sitemap_index" : "sitemap" + param);
sendFile(request, response, fileStem + ext, mimeType, forSitemapsOrg); sendFile(request, response, fileStem + ext, mimeType, forSitemapsOrg);
} }
private void sendFile(HttpServletRequest request, private void sendFile(HttpServletRequest request,
HttpServletResponse response, String file, String mimeType, HttpServletResponse response, String file, String mimeType,
boolean compressed) throws ServletException, IOException boolean compressed) throws ServletException, IOException
{ {
File f = new File(ConfigurationManager.getProperty("dspace.dir") File f = new File(ConfigurationManager.getProperty("sitemap.dir"), file);
+ File.separator + "sitemaps", file);
if (!f.exists())
if (!f.exists()) {
{ response.setStatus(HttpServletResponse.SC_NOT_FOUND);
response.setStatus(HttpServletResponse.SC_NOT_FOUND); JSPManager.showJSP(request, response, "/error/404.jsp");
JSPManager.showJSP(request, response, "/error/404.jsp"); return;
return; }
}
long lastMod = f.lastModified();
long lastMod = f.lastModified(); response.setDateHeader("Last-Modified", lastMod);
response.setDateHeader("Last-Modified", lastMod);
// Check for if-modified-since header
// Check for if-modified-since header long modSince = request.getDateHeader("If-Modified-Since");
long modSince = request.getDateHeader("If-Modified-Since");
if (modSince != -1 && lastMod < modSince)
if (modSince != -1 && lastMod < modSince) {
{ // Sitemap file has not been modified since requested date,
// Sitemap file has not been modified since requested date, // hence bitstream has not; return 304
// hence bitstream has not; return 304 response.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
response.setStatus(HttpServletResponse.SC_NOT_MODIFIED); return;
return; }
}
if (compressed)
if (compressed) {
{ response.setHeader("Content-Encoding", "gzip");
response.setHeader("Content-Encoding", "gzip"); }
}
// Pipe the bits
// Pipe the bits InputStream is = new FileInputStream(f);
InputStream is = new FileInputStream(f);
// Set the response MIME type
// Set the response MIME type response.setContentType(mimeType);
response.setContentType(mimeType);
// Response length
// Response length response.setHeader("Content-Length", String.valueOf(f.length()));
response.setHeader("Content-Length", String.valueOf(f.length()));
Utils.bufferedCopy(is, response.getOutputStream());
Utils.bufferedCopy(is, response.getOutputStream()); is.close();
is.close(); response.getOutputStream().flush();
response.getOutputStream().flush(); }
} }
}

View File

@@ -1,223 +1,222 @@
/* /*
* SitemapReader.java * SitemapReader.java
* *
* Version: $Revision: 1.2 $ * Version: $Revision: 1.2 $
* *
* Date: $Date: 2006/04/25 15:24:23 $ * Date: $Date: 2006/04/25 15:24:23 $
* *
* Copyright (c) 2002, Hewlett-Packard Company and Massachusetts * Copyright (c) 2002, Hewlett-Packard Company and Massachusetts
* Institute of Technology. All rights reserved. * Institute of Technology. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are * modification, are permitted provided that the following conditions are
* met: * met:
* *
* - Redistributions of source code must retain the above copyright * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer. * notice, this list of conditions and the following disclaimer.
* *
* - Redistributions in binary form must reproduce the above copyright * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* *
* - Neither the name of the Hewlett-Packard Company nor the name of the * - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their * Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from * contributors may be used to endorse or promote products derived from
* this software without specific prior written permission. * this software without specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE. * DAMAGE.
*/ */
package org.dspace.app.xmlui.cocoon; package org.dspace.app.xmlui.cocoon;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Map; import java.util.Map;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.apache.avalon.excalibur.pool.Recyclable; import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.parameters.Parameters; import org.apache.avalon.framework.parameters.Parameters;
import org.apache.cocoon.ProcessingException; import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.ResourceNotFoundException; import org.apache.cocoon.ResourceNotFoundException;
import org.apache.cocoon.environment.ObjectModelHelper; import org.apache.cocoon.environment.ObjectModelHelper;
import org.apache.cocoon.environment.Request; import org.apache.cocoon.environment.Request;
import org.apache.cocoon.environment.Response; import org.apache.cocoon.environment.Response;
import org.apache.cocoon.environment.SourceResolver; import org.apache.cocoon.environment.SourceResolver;
import org.apache.cocoon.environment.http.HttpEnvironment; import org.apache.cocoon.environment.http.HttpEnvironment;
import org.apache.cocoon.reading.AbstractReader; import org.apache.cocoon.reading.AbstractReader;
import org.dspace.core.ConfigurationManager; import org.dspace.core.ConfigurationManager;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.dspace.core.Utils; import org.dspace.core.Utils;
/** /**
* Class will read a generated Sitemap (www.sitemaps.org or HTML sitemap) * Class will read a generated Sitemap (www.sitemaps.org or HTML sitemap)
* from [dspace]/sitemaps/ and serve it up to the requesting Search Engine. * from [dspace]/sitemaps/ and serve it up to the requesting Search Engine.
* *
* Sitemaps are generated by running the [dspace]/bin/generate-sitemaps script. * Sitemaps are generated by running the [dspace]/bin/generate-sitemaps script.
* *
* There are essentially two types of Sitemaps: * There are essentially two types of Sitemaps:
* *
* (1) Basic HTML Sitemaps * (1) Basic HTML Sitemaps
* *
* path = "/htmlmap" * path = "/htmlmap"
* *
* &lt;map:read type="SitemapReader"> * &lt;map:read type="SitemapReader">
* &lt;map:parameter name="type" value="html"/&gt; * &lt;map:parameter name="type" value="html"/&gt;
* &lt;/map:read&gt; * &lt;/map:read&gt;
* *
* (2) Sitemaps.org XML Sitemaps * (2) Sitemaps.org XML Sitemaps
* *
* path = "/sitemap" * path = "/sitemap"
* *
* &lt;map:read type="SitemapReader"> * &lt;map:read type="SitemapReader">
* &lt;map:parameter name="type" value="sitemaps.org"/&gt; * &lt;map:parameter name="type" value="sitemaps.org"/&gt;
* &lt;/map:read&gt; * &lt;/map:read&gt;
* *
* @author Tim Donohue * @author Tim Donohue
*/ */
public class SitemapReader extends AbstractReader implements Recyclable public class SitemapReader extends AbstractReader implements Recyclable
{ {
private static Logger log = Logger.getLogger(SitemapReader.class); private static Logger log = Logger.getLogger(SitemapReader.class);
/** The Cocoon response */ /** The Cocoon response */
protected Response response; protected Response response;
/** The Cocoon request */ /** The Cocoon request */
protected Request request; protected Request request;
/** The sitemap's mime-type */ /** The sitemap's mime-type */
protected String sitemapMimeType; protected String sitemapMimeType;
/** true if we are for serving sitemap.org sitemaps, false otherwise */ /** true if we are for serving sitemap.org sitemaps, false otherwise */
private boolean forSitemapsOrg = false; private boolean forSitemapsOrg = false;
/** /**
* Set up the bitstream reader. * Set up the bitstream reader.
* *
* See the class description for information on configuration options. * See the class description for information on configuration options.
*/ */
public void setup(SourceResolver resolver, Map objectModel, String src, public void setup(SourceResolver resolver, Map objectModel, String src,
Parameters par) throws ProcessingException, SAXException, Parameters par) throws ProcessingException, SAXException,
IOException IOException
{ {
super.setup(resolver, objectModel, src, par); super.setup(resolver, objectModel, src, par);
this.request = ObjectModelHelper.getRequest(objectModel); this.request = ObjectModelHelper.getRequest(objectModel);
this.response = ObjectModelHelper.getResponse(objectModel); this.response = ObjectModelHelper.getResponse(objectModel);
this.forSitemapsOrg = false; this.forSitemapsOrg = false;
// Get our parameter that identifies type of sitemap (default to HTML sitemap) // Get our parameter that identifies type of sitemap (default to HTML sitemap)
String type = par.getParameter("type", "html"); String type = par.getParameter("type", "html");
if (type != null && type.equalsIgnoreCase("sitemaps.org")) if (type != null && type.equalsIgnoreCase("sitemaps.org"))
{ {
this.forSitemapsOrg = true; this.forSitemapsOrg = true;
} }
else if (type == null || !type.equalsIgnoreCase("html")) else if (type == null || !type.equalsIgnoreCase("html"))
{ {
log.warn("Invalid initialization parameter for sitemapReader: assuming basic HTML"); log.warn("Invalid initialization parameter for sitemapReader: assuming basic HTML");
} }
} }
/** /**
* Generate the output. Determine which type of sitemap is being * Generate the output. Determine which type of sitemap is being
* requested and setup the main request parameters * requested and setup the main request parameters
*/ */
public void generate() throws IOException, ProcessingException public void generate() throws IOException, ProcessingException
{ {
String param = this.request.getParameter("map"); String param = this.request.getParameter("map");
String ext = (this.forSitemapsOrg ? ".xml.gz" : ".html"); String ext = (this.forSitemapsOrg ? ".xml.gz" : ".html");
this.sitemapMimeType = (this.forSitemapsOrg ? "text/xml" : "text/html"); this.sitemapMimeType = (this.forSitemapsOrg ? "text/xml" : "text/html");
String fileStem = (param == null ? "sitemap_index" : "sitemap" + param); String fileStem = (param == null ? "sitemap_index" : "sitemap" + param);
sendFile(fileStem + ext, this.forSitemapsOrg); sendFile(fileStem + ext, this.forSitemapsOrg);
} }
/** /**
* Write the actual pre-generated Sitemap data out to the response. * Write the actual pre-generated Sitemap data out to the response.
* *
* @param file the actual file to send * @param file the actual file to send
* @param compressed true if file should be compressed * @param compressed true if file should be compressed
*/ */
private void sendFile(String file, private void sendFile(String file,
boolean compressed) throws IOException, ResourceNotFoundException boolean compressed) throws IOException, ResourceNotFoundException
{ {
File f = new File(ConfigurationManager.getProperty("dspace.dir") File f = new File(ConfigurationManager.getProperty("sitemap.dir"), file);
+ File.separator + "sitemaps", file);
HttpServletResponse httpResponse = (HttpServletResponse) objectModel.get(HttpEnvironment.HTTP_RESPONSE_OBJECT);
HttpServletResponse httpResponse = (HttpServletResponse) objectModel.get(HttpEnvironment.HTTP_RESPONSE_OBJECT);
if (!f.exists())
if (!f.exists()) {
{ httpResponse.setStatus(HttpServletResponse.SC_NOT_FOUND);
httpResponse.setStatus(HttpServletResponse.SC_NOT_FOUND); throw new ResourceNotFoundException("Unable to locate sitemap");
throw new ResourceNotFoundException("Unable to locate sitemap"); }
}
long lastMod = f.lastModified();
long lastMod = f.lastModified(); this.response.setDateHeader("Last-Modified", lastMod);
this.response.setDateHeader("Last-Modified", lastMod);
// Check for if-modified-since header
// Check for if-modified-since header long modSince = this.request.getDateHeader("If-Modified-Since");
long modSince = this.request.getDateHeader("If-Modified-Since");
if (modSince != -1 && lastMod < modSince)
if (modSince != -1 && lastMod < modSince) {
{ // Sitemap file has not been modified since requested date,
// Sitemap file has not been modified since requested date, // hence bitstream has not; return 304
// hence bitstream has not; return 304 httpResponse.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
httpResponse.setStatus(HttpServletResponse.SC_NOT_MODIFIED); return;
return; }
}
if (compressed)
if (compressed) {
{ this.response.setHeader("Content-Encoding", "gzip");
this.response.setHeader("Content-Encoding", "gzip"); }
}
// Pipe the bits
// Pipe the bits InputStream is = new FileInputStream(f);
InputStream is = new FileInputStream(f);
// Set the response MIME type
// Set the response MIME type response.setHeader("Content-Type", this.sitemapMimeType);
response.setHeader("Content-Type", this.sitemapMimeType);
// Response length
// Response length this.response.setHeader("Content-Length", String.valueOf(f.length()));
this.response.setHeader("Content-Length", String.valueOf(f.length()));
Utils.bufferedCopy(is, this.out);
Utils.bufferedCopy(is, this.out); is.close();
is.close(); this.out.flush();
this.out.flush(); }
}
/**
/** * Returns the mime-type of the sitemap
* Returns the mime-type of the sitemap */
*/ public String getMimeType()
public String getMimeType() {
{ return this.sitemapMimeType;
return this.sitemapMimeType; }
}
/**
/** * Recycle
* Recycle */
*/ public void recycle() {
public void recycle() { this.response = null;
this.response = null; this.request = null;
this.request = null; this.sitemapMimeType = null;
this.sitemapMimeType = null; this.forSitemapsOrg = false;
this.forSitemapsOrg = false; }
} }
}

View File

@@ -17,6 +17,7 @@
- [2513300] Fix for bug [1774958] Nested folders do not export correctly - [2513300] Fix for bug [1774958] Nested folders do not export correctly
- Fix for SF bug [2086481] xmlui Administrative log in as another eperson - Fix for SF bug [2086481] xmlui Administrative log in as another eperson
- Fix for SF bug [2155479] Submission verify page handles dc.identifier.* incorrectly - Fix for SF bug [2155479] Submission verify page handles dc.identifier.* incorrectly
- SF patch [2560974] for SF feature request [2560839] Make sitemap directory configurable
(Stuart Lewis / Chris Yates / Flavio Botelho / Alex Barbieri / Reuben Pasquini / Paulo Matos) (Stuart Lewis / Chris Yates / Flavio Botelho / Alex Barbieri / Reuben Pasquini / Paulo Matos)
- [2057378] Hierarchical LDAP support - [2057378] Hierarchical LDAP support

View File

@@ -1044,6 +1044,9 @@ xmlui.content_disposition_threshold = 8388608
#### Sitemap settings ##### #### Sitemap settings #####
# the directory where the generated sitemaps are stored
sitemap.dir = ${dspace.dir}/sitemaps
# #
# Comma-separated list of search engine URLs to 'ping' when a new Sitemap has # Comma-separated list of search engine URLs to 'ping' when a new Sitemap has
# been created. Include everything except the Sitemap URL itself (which will # been created. Include everything except the Sitemap URL itself (which will