115778: Adjust redirect check to only follow a limited amount defined in curate.cfg

This commit is contained in:
Nathan Buckingham
2024-11-12 15:59:17 -05:00
parent f8c482c27a
commit ef381aa151
2 changed files with 15 additions and 4 deletions

View File

@@ -19,6 +19,8 @@ import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.curate.AbstractCurationTask;
import org.dspace.curate.Curator;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
/**
* A basic link checker that is designed to be extended. By default this link checker
@@ -42,6 +44,9 @@ public class BasicLinkChecker extends AbstractCurationTask {
// The log4j logger for this class
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(BasicLinkChecker.class);
protected static final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
/**
* Perform the link checking.
@@ -110,7 +115,8 @@ public class BasicLinkChecker extends AbstractCurationTask {
*/
protected boolean checkURL(String url, StringBuilder results) {
// Link check the URL
int httpStatus = getResponseStatus(url);
int redirects = 0;
int httpStatus = getResponseStatus(url, redirects);
if ((httpStatus >= 200) && (httpStatus < 300)) {
results.append(" - " + url + " = " + httpStatus + " - OK\n");
@@ -128,18 +134,20 @@ public class BasicLinkChecker extends AbstractCurationTask {
* @param url The url to open
* @return The HTTP response code (e.g. 200 / 301 / 404 / 500)
*/
protected int getResponseStatus(String url) {
protected int getResponseStatus(String url, int redirects) {
try {
URL theURL = new URL(url);
HttpURLConnection connection = (HttpURLConnection) theURL.openConnection();
connection.setInstanceFollowRedirects(true);
int statusCode = connection.getResponseCode();
int maxRedirect = configurationService.getIntProperty("curate.checklinks.max-redirect", 0);
if ((statusCode == HttpURLConnection.HTTP_MOVED_TEMP || statusCode == HttpURLConnection.HTTP_MOVED_PERM ||
statusCode == HttpURLConnection.HTTP_SEE_OTHER)) {
connection.disconnect();
String newUrl = connection.getHeaderField("Location");
if (newUrl != null) {
return getResponseStatus(newUrl);
if (newUrl != null && (maxRedirect >= redirects || maxRedirect == -1)) {
redirects++;
return getResponseStatus(newUrl, redirects);
}
}

View File

@@ -26,3 +26,6 @@ curate.taskqueue.dir = ${dspace.dir}/ctqueues
# (optional) directory location of scripted (non-java) tasks
# curate.script.dir = ${dspace.dir}/ctscripts
# Maximum amount of redirects set to 0 for none and -1 for unlimited
curate.checklinks.max-redirect = 0