mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 01:54:22 +00:00
Merge pull request #10068 from DSpace/backport-9658-to-dspace-7_x
[Port dspace-7_x] Feature curation link checker customizations
This commit is contained in:
@@ -19,6 +19,8 @@ import org.dspace.content.Item;
|
||||
import org.dspace.content.MetadataValue;
|
||||
import org.dspace.curate.AbstractCurationTask;
|
||||
import org.dspace.curate.Curator;
|
||||
import org.dspace.services.ConfigurationService;
|
||||
import org.dspace.services.factory.DSpaceServicesFactory;
|
||||
|
||||
/**
|
||||
* A basic link checker that is designed to be extended. By default this link checker
|
||||
@@ -42,6 +44,9 @@ public class BasicLinkChecker extends AbstractCurationTask {
|
||||
// The log4j logger for this class
|
||||
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(BasicLinkChecker.class);
|
||||
|
||||
protected static final ConfigurationService configurationService
|
||||
= DSpaceServicesFactory.getInstance().getConfigurationService();
|
||||
|
||||
|
||||
/**
|
||||
* Perform the link checking.
|
||||
@@ -110,7 +115,8 @@ public class BasicLinkChecker extends AbstractCurationTask {
|
||||
*/
|
||||
protected boolean checkURL(String url, StringBuilder results) {
|
||||
// Link check the URL
|
||||
int httpStatus = getResponseStatus(url);
|
||||
int redirects = 0;
|
||||
int httpStatus = getResponseStatus(url, redirects);
|
||||
|
||||
if ((httpStatus >= 200) && (httpStatus < 300)) {
|
||||
results.append(" - " + url + " = " + httpStatus + " - OK\n");
|
||||
@@ -128,14 +134,24 @@ public class BasicLinkChecker extends AbstractCurationTask {
|
||||
* @param url The url to open
|
||||
* @return The HTTP response code (e.g. 200 / 301 / 404 / 500)
|
||||
*/
|
||||
protected int getResponseStatus(String url) {
|
||||
protected int getResponseStatus(String url, int redirects) {
|
||||
try {
|
||||
URL theURL = new URL(url);
|
||||
HttpURLConnection connection = (HttpURLConnection) theURL.openConnection();
|
||||
int code = connection.getResponseCode();
|
||||
connection.disconnect();
|
||||
connection.setInstanceFollowRedirects(true);
|
||||
int statusCode = connection.getResponseCode();
|
||||
int maxRedirect = configurationService.getIntProperty("curate.checklinks.max-redirect", 0);
|
||||
if ((statusCode == HttpURLConnection.HTTP_MOVED_TEMP || statusCode == HttpURLConnection.HTTP_MOVED_PERM ||
|
||||
statusCode == HttpURLConnection.HTTP_SEE_OTHER)) {
|
||||
connection.disconnect();
|
||||
String newUrl = connection.getHeaderField("Location");
|
||||
if (newUrl != null && (maxRedirect >= redirects || maxRedirect == -1)) {
|
||||
redirects++;
|
||||
return getResponseStatus(newUrl, redirects);
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
return statusCode;
|
||||
|
||||
} catch (IOException ioe) {
|
||||
// Must be a bad URL
|
||||
|
@@ -7,6 +7,9 @@
|
||||
*/
|
||||
package org.dspace.app.scripts.handler.impl;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.dspace.scripts.handler.impl.CommandLineDSpaceRunnableHandler;
|
||||
|
||||
/**
|
||||
@@ -17,6 +20,12 @@ public class TestDSpaceRunnableHandler extends CommandLineDSpaceRunnableHandler
|
||||
|
||||
private Exception exception = null;
|
||||
|
||||
private final List<String> infoMessages = new ArrayList<>();
|
||||
|
||||
private final List<String> errorMessages = new ArrayList<>();
|
||||
|
||||
private final List<String> warningMessages = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* We're overriding this method so that we can stop the script from doing the System.exit() if
|
||||
* an exception within the script is thrown
|
||||
@@ -33,4 +42,34 @@ public class TestDSpaceRunnableHandler extends CommandLineDSpaceRunnableHandler
|
||||
public Exception getException() {
|
||||
return exception;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void logInfo(String message) {
|
||||
super.logInfo(message);
|
||||
infoMessages.add(message);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void logWarning(String message) {
|
||||
super.logWarning(message);
|
||||
warningMessages.add(message);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void logError(String message) {
|
||||
super.logError(message);
|
||||
errorMessages.add(message);
|
||||
}
|
||||
|
||||
public List<String> getInfoMessages() {
|
||||
return infoMessages;
|
||||
}
|
||||
|
||||
public List<String> getErrorMessages() {
|
||||
return errorMessages;
|
||||
}
|
||||
|
||||
public List<String> getWarningMessages() {
|
||||
return warningMessages;
|
||||
}
|
||||
}
|
||||
|
@@ -9,6 +9,8 @@ package org.dspace.curate;
|
||||
|
||||
import static com.jayway.jsonpath.JsonPath.read;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||
@@ -21,6 +23,7 @@ import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dspace.app.rest.converter.DSpaceRunnableParameterConverter;
|
||||
import org.dspace.app.rest.matcher.ProcessMatcher;
|
||||
import org.dspace.app.rest.model.ParameterValueRest;
|
||||
@@ -28,6 +31,7 @@ import org.dspace.app.rest.model.ProcessRest;
|
||||
import org.dspace.app.rest.model.ScriptRest;
|
||||
import org.dspace.app.rest.projection.Projection;
|
||||
import org.dspace.app.rest.test.AbstractControllerIntegrationTest;
|
||||
import org.dspace.app.scripts.handler.impl.TestDSpaceRunnableHandler;
|
||||
import org.dspace.builder.CollectionBuilder;
|
||||
import org.dspace.builder.CommunityBuilder;
|
||||
import org.dspace.builder.EPersonBuilder;
|
||||
@@ -41,7 +45,9 @@ import org.dspace.content.Site;
|
||||
import org.dspace.content.factory.ContentServiceFactory;
|
||||
import org.dspace.eperson.EPerson;
|
||||
import org.dspace.scripts.DSpaceCommandLineParameter;
|
||||
import org.dspace.scripts.DSpaceRunnable;
|
||||
import org.dspace.scripts.configuration.ScriptConfiguration;
|
||||
import org.dspace.scripts.factory.ScriptServiceFactory;
|
||||
import org.dspace.scripts.service.ScriptService;
|
||||
import org.junit.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
@@ -640,4 +646,65 @@ public class CurationScriptIT extends AbstractControllerIntegrationTest {
|
||||
ProcessBuilder.deleteProcess(idItemRef.get());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testURLRedirectCurateTest() throws Exception {
|
||||
context.turnOffAuthorisationSystem();
|
||||
parentCommunity = CommunityBuilder.createCommunity(context)
|
||||
.withName("Parent Community")
|
||||
.build();
|
||||
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
|
||||
.withName("Sub Community")
|
||||
.build();
|
||||
Collection col1 = CollectionBuilder.createCollection(context, child1).withName("Collection 1").build();
|
||||
|
||||
Item publicItem1 = ItemBuilder.createItem(context, col1)
|
||||
.withTitle("Public item 1")
|
||||
.withIssueDate("2017-10-17")
|
||||
.withAuthor("Smith, Donald").withAuthor("Doe, John")
|
||||
// Value not starting with http or https
|
||||
.withMetadata("dc", "identifier", "uri", "demo.dspace.org/home")
|
||||
// MetadataValueLinkChecker uri field with regular link
|
||||
.withMetadata("dc", "description", null, "https://google.com")
|
||||
// MetadataValueLinkChecker uri field with redirect link
|
||||
.withMetadata("dc", "description", "uri", "https://demo7.dspace.org/handle/123456789/1")
|
||||
// MetadataValueLinkChecker uri field with non resolving link
|
||||
.withMetadata("dc", "description", "uri", "https://www.atmire.com/broken-link")
|
||||
.withSubject("ExtraEntry")
|
||||
.build();
|
||||
|
||||
String[] args = new String[] {"curate", "-t", "checklinks", "-i", publicItem1.getHandle()};
|
||||
TestDSpaceRunnableHandler handler = new TestDSpaceRunnableHandler();
|
||||
|
||||
ScriptService scriptService = ScriptServiceFactory.getInstance().getScriptService();
|
||||
ScriptConfiguration scriptConfiguration = scriptService.getScriptConfiguration(args[0]);
|
||||
|
||||
DSpaceRunnable script = null;
|
||||
if (scriptConfiguration != null) {
|
||||
script = scriptService.createDSpaceRunnableForScriptConfiguration(scriptConfiguration);
|
||||
}
|
||||
if (script != null) {
|
||||
script.initialize(args, handler, admin);
|
||||
script.run();
|
||||
}
|
||||
|
||||
// field that should be ignored
|
||||
assertFalse(checkIfInfoTextLoggedByHandler(handler, "demo.dspace.org/home"));
|
||||
// redirect links in field that should not be ignored (https) => expect OK
|
||||
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://demo7.dspace.org/handle/123456789/1 = 200 - OK"));
|
||||
// regular link in field that should not be ignored (http) => expect OK
|
||||
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://google.com = 200 - OK"));
|
||||
// nonexistent link in field that should not be ignored => expect 404
|
||||
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://www.atmire.com/broken-link = 404 - FAILED"));
|
||||
}
|
||||
|
||||
boolean checkIfInfoTextLoggedByHandler(TestDSpaceRunnableHandler handler, String text) {
|
||||
for (String message: handler.getInfoMessages()) {
|
||||
if (StringUtils.containsIgnoreCase(message, text)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -26,3 +26,6 @@ curate.taskqueue.dir = ${dspace.dir}/ctqueues
|
||||
|
||||
# (optional) directory location of scripted (non-java) tasks
|
||||
# curate.script.dir = ${dspace.dir}/ctscripts
|
||||
|
||||
# Maximum amount of redirects set to 0 for none and -1 for unlimited
|
||||
curate.checklinks.max-redirect = 0
|
||||
|
Reference in New Issue
Block a user