Merge pull request #10068 from DSpace/backport-9658-to-dspace-7_x

[Port dspace-7_x] Feature curation link checker customizations
This commit is contained in:
Tim Donohue
2024-12-03 09:53:43 -06:00
committed by GitHub
4 changed files with 130 additions and 5 deletions

View File

@@ -19,6 +19,8 @@ import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.curate.AbstractCurationTask;
import org.dspace.curate.Curator;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
/**
* A basic link checker that is designed to be extended. By default this link checker
@@ -42,6 +44,9 @@ public class BasicLinkChecker extends AbstractCurationTask {
// The log4j logger for this class
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(BasicLinkChecker.class);
protected static final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
/**
* Perform the link checking.
@@ -110,7 +115,8 @@ public class BasicLinkChecker extends AbstractCurationTask {
*/
protected boolean checkURL(String url, StringBuilder results) {
// Link check the URL
int httpStatus = getResponseStatus(url);
int redirects = 0;
int httpStatus = getResponseStatus(url, redirects);
if ((httpStatus >= 200) && (httpStatus < 300)) {
results.append(" - " + url + " = " + httpStatus + " - OK\n");
@@ -128,14 +134,24 @@ public class BasicLinkChecker extends AbstractCurationTask {
* @param url The url to open
* @return The HTTP response code (e.g. 200 / 301 / 404 / 500)
*/
protected int getResponseStatus(String url) {
protected int getResponseStatus(String url, int redirects) {
try {
URL theURL = new URL(url);
HttpURLConnection connection = (HttpURLConnection) theURL.openConnection();
int code = connection.getResponseCode();
connection.disconnect();
connection.setInstanceFollowRedirects(true);
int statusCode = connection.getResponseCode();
int maxRedirect = configurationService.getIntProperty("curate.checklinks.max-redirect", 0);
if ((statusCode == HttpURLConnection.HTTP_MOVED_TEMP || statusCode == HttpURLConnection.HTTP_MOVED_PERM ||
statusCode == HttpURLConnection.HTTP_SEE_OTHER)) {
connection.disconnect();
String newUrl = connection.getHeaderField("Location");
if (newUrl != null && (maxRedirect >= redirects || maxRedirect == -1)) {
redirects++;
return getResponseStatus(newUrl, redirects);
}
return code;
}
return statusCode;
} catch (IOException ioe) {
// Must be a bad URL

View File

@@ -7,6 +7,9 @@
*/
package org.dspace.app.scripts.handler.impl;
import java.util.ArrayList;
import java.util.List;
import org.dspace.scripts.handler.impl.CommandLineDSpaceRunnableHandler;
/**
@@ -17,6 +20,12 @@ public class TestDSpaceRunnableHandler extends CommandLineDSpaceRunnableHandler
private Exception exception = null;
private final List<String> infoMessages = new ArrayList<>();
private final List<String> errorMessages = new ArrayList<>();
private final List<String> warningMessages = new ArrayList<>();
/**
* We're overriding this method so that we can stop the script from doing the System.exit() if
* an exception within the script is thrown
@@ -33,4 +42,34 @@ public class TestDSpaceRunnableHandler extends CommandLineDSpaceRunnableHandler
public Exception getException() {
return exception;
}
@Override
public void logInfo(String message) {
super.logInfo(message);
infoMessages.add(message);
}
@Override
public void logWarning(String message) {
super.logWarning(message);
warningMessages.add(message);
}
@Override
public void logError(String message) {
super.logError(message);
errorMessages.add(message);
}
public List<String> getInfoMessages() {
return infoMessages;
}
public List<String> getErrorMessages() {
return errorMessages;
}
public List<String> getWarningMessages() {
return warningMessages;
}
}

View File

@@ -9,6 +9,8 @@ package org.dspace.curate;
import static com.jayway.jsonpath.JsonPath.read;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
@@ -21,6 +23,7 @@ import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.dspace.app.rest.converter.DSpaceRunnableParameterConverter;
import org.dspace.app.rest.matcher.ProcessMatcher;
import org.dspace.app.rest.model.ParameterValueRest;
@@ -28,6 +31,7 @@ import org.dspace.app.rest.model.ProcessRest;
import org.dspace.app.rest.model.ScriptRest;
import org.dspace.app.rest.projection.Projection;
import org.dspace.app.rest.test.AbstractControllerIntegrationTest;
import org.dspace.app.scripts.handler.impl.TestDSpaceRunnableHandler;
import org.dspace.builder.CollectionBuilder;
import org.dspace.builder.CommunityBuilder;
import org.dspace.builder.EPersonBuilder;
@@ -41,7 +45,9 @@ import org.dspace.content.Site;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.eperson.EPerson;
import org.dspace.scripts.DSpaceCommandLineParameter;
import org.dspace.scripts.DSpaceRunnable;
import org.dspace.scripts.configuration.ScriptConfiguration;
import org.dspace.scripts.factory.ScriptServiceFactory;
import org.dspace.scripts.service.ScriptService;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
@@ -640,4 +646,65 @@ public class CurationScriptIT extends AbstractControllerIntegrationTest {
ProcessBuilder.deleteProcess(idItemRef.get());
}
}
@Test
public void testURLRedirectCurateTest() throws Exception {
context.turnOffAuthorisationSystem();
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
.withName("Sub Community")
.build();
Collection col1 = CollectionBuilder.createCollection(context, child1).withName("Collection 1").build();
Item publicItem1 = ItemBuilder.createItem(context, col1)
.withTitle("Public item 1")
.withIssueDate("2017-10-17")
.withAuthor("Smith, Donald").withAuthor("Doe, John")
// Value not starting with http or https
.withMetadata("dc", "identifier", "uri", "demo.dspace.org/home")
// MetadataValueLinkChecker uri field with regular link
.withMetadata("dc", "description", null, "https://google.com")
// MetadataValueLinkChecker uri field with redirect link
.withMetadata("dc", "description", "uri", "https://demo7.dspace.org/handle/123456789/1")
// MetadataValueLinkChecker uri field with non resolving link
.withMetadata("dc", "description", "uri", "https://www.atmire.com/broken-link")
.withSubject("ExtraEntry")
.build();
String[] args = new String[] {"curate", "-t", "checklinks", "-i", publicItem1.getHandle()};
TestDSpaceRunnableHandler handler = new TestDSpaceRunnableHandler();
ScriptService scriptService = ScriptServiceFactory.getInstance().getScriptService();
ScriptConfiguration scriptConfiguration = scriptService.getScriptConfiguration(args[0]);
DSpaceRunnable script = null;
if (scriptConfiguration != null) {
script = scriptService.createDSpaceRunnableForScriptConfiguration(scriptConfiguration);
}
if (script != null) {
script.initialize(args, handler, admin);
script.run();
}
// field that should be ignored
assertFalse(checkIfInfoTextLoggedByHandler(handler, "demo.dspace.org/home"));
// redirect links in field that should not be ignored (https) => expect OK
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://demo7.dspace.org/handle/123456789/1 = 200 - OK"));
// regular link in field that should not be ignored (http) => expect OK
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://google.com = 200 - OK"));
// nonexistent link in field that should not be ignored => expect 404
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://www.atmire.com/broken-link = 404 - FAILED"));
}
boolean checkIfInfoTextLoggedByHandler(TestDSpaceRunnableHandler handler, String text) {
for (String message: handler.getInfoMessages()) {
if (StringUtils.containsIgnoreCase(message, text)) {
return true;
}
}
return false;
}
}

View File

@@ -26,3 +26,6 @@ curate.taskqueue.dir = ${dspace.dir}/ctqueues
# (optional) directory location of scripted (non-java) tasks
# curate.script.dir = ${dspace.dir}/ctscripts
# Maximum amount of redirects set to 0 for none and -1 for unlimited
curate.checklinks.max-redirect = 0