mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-18 07:23:08 +00:00
Merge pull request #10068 from DSpace/backport-9658-to-dspace-7_x
[Port dspace-7_x] Feature curation link checker customizations
This commit is contained in:
@@ -19,6 +19,8 @@ import org.dspace.content.Item;
|
|||||||
import org.dspace.content.MetadataValue;
|
import org.dspace.content.MetadataValue;
|
||||||
import org.dspace.curate.AbstractCurationTask;
|
import org.dspace.curate.AbstractCurationTask;
|
||||||
import org.dspace.curate.Curator;
|
import org.dspace.curate.Curator;
|
||||||
|
import org.dspace.services.ConfigurationService;
|
||||||
|
import org.dspace.services.factory.DSpaceServicesFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A basic link checker that is designed to be extended. By default this link checker
|
* A basic link checker that is designed to be extended. By default this link checker
|
||||||
@@ -42,6 +44,9 @@ public class BasicLinkChecker extends AbstractCurationTask {
|
|||||||
// The log4j logger for this class
|
// The log4j logger for this class
|
||||||
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(BasicLinkChecker.class);
|
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(BasicLinkChecker.class);
|
||||||
|
|
||||||
|
protected static final ConfigurationService configurationService
|
||||||
|
= DSpaceServicesFactory.getInstance().getConfigurationService();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform the link checking.
|
* Perform the link checking.
|
||||||
@@ -110,7 +115,8 @@ public class BasicLinkChecker extends AbstractCurationTask {
|
|||||||
*/
|
*/
|
||||||
protected boolean checkURL(String url, StringBuilder results) {
|
protected boolean checkURL(String url, StringBuilder results) {
|
||||||
// Link check the URL
|
// Link check the URL
|
||||||
int httpStatus = getResponseStatus(url);
|
int redirects = 0;
|
||||||
|
int httpStatus = getResponseStatus(url, redirects);
|
||||||
|
|
||||||
if ((httpStatus >= 200) && (httpStatus < 300)) {
|
if ((httpStatus >= 200) && (httpStatus < 300)) {
|
||||||
results.append(" - " + url + " = " + httpStatus + " - OK\n");
|
results.append(" - " + url + " = " + httpStatus + " - OK\n");
|
||||||
@@ -128,14 +134,24 @@ public class BasicLinkChecker extends AbstractCurationTask {
|
|||||||
* @param url The url to open
|
* @param url The url to open
|
||||||
* @return The HTTP response code (e.g. 200 / 301 / 404 / 500)
|
* @return The HTTP response code (e.g. 200 / 301 / 404 / 500)
|
||||||
*/
|
*/
|
||||||
protected int getResponseStatus(String url) {
|
protected int getResponseStatus(String url, int redirects) {
|
||||||
try {
|
try {
|
||||||
URL theURL = new URL(url);
|
URL theURL = new URL(url);
|
||||||
HttpURLConnection connection = (HttpURLConnection) theURL.openConnection();
|
HttpURLConnection connection = (HttpURLConnection) theURL.openConnection();
|
||||||
int code = connection.getResponseCode();
|
connection.setInstanceFollowRedirects(true);
|
||||||
|
int statusCode = connection.getResponseCode();
|
||||||
|
int maxRedirect = configurationService.getIntProperty("curate.checklinks.max-redirect", 0);
|
||||||
|
if ((statusCode == HttpURLConnection.HTTP_MOVED_TEMP || statusCode == HttpURLConnection.HTTP_MOVED_PERM ||
|
||||||
|
statusCode == HttpURLConnection.HTTP_SEE_OTHER)) {
|
||||||
connection.disconnect();
|
connection.disconnect();
|
||||||
|
String newUrl = connection.getHeaderField("Location");
|
||||||
|
if (newUrl != null && (maxRedirect >= redirects || maxRedirect == -1)) {
|
||||||
|
redirects++;
|
||||||
|
return getResponseStatus(newUrl, redirects);
|
||||||
|
}
|
||||||
|
|
||||||
return code;
|
}
|
||||||
|
return statusCode;
|
||||||
|
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
// Must be a bad URL
|
// Must be a bad URL
|
||||||
|
@@ -7,6 +7,9 @@
|
|||||||
*/
|
*/
|
||||||
package org.dspace.app.scripts.handler.impl;
|
package org.dspace.app.scripts.handler.impl;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.dspace.scripts.handler.impl.CommandLineDSpaceRunnableHandler;
|
import org.dspace.scripts.handler.impl.CommandLineDSpaceRunnableHandler;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -17,6 +20,12 @@ public class TestDSpaceRunnableHandler extends CommandLineDSpaceRunnableHandler
|
|||||||
|
|
||||||
private Exception exception = null;
|
private Exception exception = null;
|
||||||
|
|
||||||
|
private final List<String> infoMessages = new ArrayList<>();
|
||||||
|
|
||||||
|
private final List<String> errorMessages = new ArrayList<>();
|
||||||
|
|
||||||
|
private final List<String> warningMessages = new ArrayList<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We're overriding this method so that we can stop the script from doing the System.exit() if
|
* We're overriding this method so that we can stop the script from doing the System.exit() if
|
||||||
* an exception within the script is thrown
|
* an exception within the script is thrown
|
||||||
@@ -33,4 +42,34 @@ public class TestDSpaceRunnableHandler extends CommandLineDSpaceRunnableHandler
|
|||||||
public Exception getException() {
|
public Exception getException() {
|
||||||
return exception;
|
return exception;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void logInfo(String message) {
|
||||||
|
super.logInfo(message);
|
||||||
|
infoMessages.add(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void logWarning(String message) {
|
||||||
|
super.logWarning(message);
|
||||||
|
warningMessages.add(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void logError(String message) {
|
||||||
|
super.logError(message);
|
||||||
|
errorMessages.add(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getInfoMessages() {
|
||||||
|
return infoMessages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getErrorMessages() {
|
||||||
|
return errorMessages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getWarningMessages() {
|
||||||
|
return warningMessages;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -9,6 +9,8 @@ package org.dspace.curate;
|
|||||||
|
|
||||||
import static com.jayway.jsonpath.JsonPath.read;
|
import static com.jayway.jsonpath.JsonPath.read;
|
||||||
import static org.hamcrest.Matchers.is;
|
import static org.hamcrest.Matchers.is;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart;
|
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart;
|
||||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
|
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
|
||||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||||
@@ -21,6 +23,7 @@ import java.util.concurrent.atomic.AtomicReference;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dspace.app.rest.converter.DSpaceRunnableParameterConverter;
|
import org.dspace.app.rest.converter.DSpaceRunnableParameterConverter;
|
||||||
import org.dspace.app.rest.matcher.ProcessMatcher;
|
import org.dspace.app.rest.matcher.ProcessMatcher;
|
||||||
import org.dspace.app.rest.model.ParameterValueRest;
|
import org.dspace.app.rest.model.ParameterValueRest;
|
||||||
@@ -28,6 +31,7 @@ import org.dspace.app.rest.model.ProcessRest;
|
|||||||
import org.dspace.app.rest.model.ScriptRest;
|
import org.dspace.app.rest.model.ScriptRest;
|
||||||
import org.dspace.app.rest.projection.Projection;
|
import org.dspace.app.rest.projection.Projection;
|
||||||
import org.dspace.app.rest.test.AbstractControllerIntegrationTest;
|
import org.dspace.app.rest.test.AbstractControllerIntegrationTest;
|
||||||
|
import org.dspace.app.scripts.handler.impl.TestDSpaceRunnableHandler;
|
||||||
import org.dspace.builder.CollectionBuilder;
|
import org.dspace.builder.CollectionBuilder;
|
||||||
import org.dspace.builder.CommunityBuilder;
|
import org.dspace.builder.CommunityBuilder;
|
||||||
import org.dspace.builder.EPersonBuilder;
|
import org.dspace.builder.EPersonBuilder;
|
||||||
@@ -41,7 +45,9 @@ import org.dspace.content.Site;
|
|||||||
import org.dspace.content.factory.ContentServiceFactory;
|
import org.dspace.content.factory.ContentServiceFactory;
|
||||||
import org.dspace.eperson.EPerson;
|
import org.dspace.eperson.EPerson;
|
||||||
import org.dspace.scripts.DSpaceCommandLineParameter;
|
import org.dspace.scripts.DSpaceCommandLineParameter;
|
||||||
|
import org.dspace.scripts.DSpaceRunnable;
|
||||||
import org.dspace.scripts.configuration.ScriptConfiguration;
|
import org.dspace.scripts.configuration.ScriptConfiguration;
|
||||||
|
import org.dspace.scripts.factory.ScriptServiceFactory;
|
||||||
import org.dspace.scripts.service.ScriptService;
|
import org.dspace.scripts.service.ScriptService;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
@@ -640,4 +646,65 @@ public class CurationScriptIT extends AbstractControllerIntegrationTest {
|
|||||||
ProcessBuilder.deleteProcess(idItemRef.get());
|
ProcessBuilder.deleteProcess(idItemRef.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testURLRedirectCurateTest() throws Exception {
|
||||||
|
context.turnOffAuthorisationSystem();
|
||||||
|
parentCommunity = CommunityBuilder.createCommunity(context)
|
||||||
|
.withName("Parent Community")
|
||||||
|
.build();
|
||||||
|
Community child1 = CommunityBuilder.createSubCommunity(context, parentCommunity)
|
||||||
|
.withName("Sub Community")
|
||||||
|
.build();
|
||||||
|
Collection col1 = CollectionBuilder.createCollection(context, child1).withName("Collection 1").build();
|
||||||
|
|
||||||
|
Item publicItem1 = ItemBuilder.createItem(context, col1)
|
||||||
|
.withTitle("Public item 1")
|
||||||
|
.withIssueDate("2017-10-17")
|
||||||
|
.withAuthor("Smith, Donald").withAuthor("Doe, John")
|
||||||
|
// Value not starting with http or https
|
||||||
|
.withMetadata("dc", "identifier", "uri", "demo.dspace.org/home")
|
||||||
|
// MetadataValueLinkChecker uri field with regular link
|
||||||
|
.withMetadata("dc", "description", null, "https://google.com")
|
||||||
|
// MetadataValueLinkChecker uri field with redirect link
|
||||||
|
.withMetadata("dc", "description", "uri", "https://demo7.dspace.org/handle/123456789/1")
|
||||||
|
// MetadataValueLinkChecker uri field with non resolving link
|
||||||
|
.withMetadata("dc", "description", "uri", "https://www.atmire.com/broken-link")
|
||||||
|
.withSubject("ExtraEntry")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
String[] args = new String[] {"curate", "-t", "checklinks", "-i", publicItem1.getHandle()};
|
||||||
|
TestDSpaceRunnableHandler handler = new TestDSpaceRunnableHandler();
|
||||||
|
|
||||||
|
ScriptService scriptService = ScriptServiceFactory.getInstance().getScriptService();
|
||||||
|
ScriptConfiguration scriptConfiguration = scriptService.getScriptConfiguration(args[0]);
|
||||||
|
|
||||||
|
DSpaceRunnable script = null;
|
||||||
|
if (scriptConfiguration != null) {
|
||||||
|
script = scriptService.createDSpaceRunnableForScriptConfiguration(scriptConfiguration);
|
||||||
|
}
|
||||||
|
if (script != null) {
|
||||||
|
script.initialize(args, handler, admin);
|
||||||
|
script.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
// field that should be ignored
|
||||||
|
assertFalse(checkIfInfoTextLoggedByHandler(handler, "demo.dspace.org/home"));
|
||||||
|
// redirect links in field that should not be ignored (https) => expect OK
|
||||||
|
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://demo7.dspace.org/handle/123456789/1 = 200 - OK"));
|
||||||
|
// regular link in field that should not be ignored (http) => expect OK
|
||||||
|
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://google.com = 200 - OK"));
|
||||||
|
// nonexistent link in field that should not be ignored => expect 404
|
||||||
|
assertTrue(checkIfInfoTextLoggedByHandler(handler, "https://www.atmire.com/broken-link = 404 - FAILED"));
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean checkIfInfoTextLoggedByHandler(TestDSpaceRunnableHandler handler, String text) {
|
||||||
|
for (String message: handler.getInfoMessages()) {
|
||||||
|
if (StringUtils.containsIgnoreCase(message, text)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -26,3 +26,6 @@ curate.taskqueue.dir = ${dspace.dir}/ctqueues
|
|||||||
|
|
||||||
# (optional) directory location of scripted (non-java) tasks
|
# (optional) directory location of scripted (non-java) tasks
|
||||||
# curate.script.dir = ${dspace.dir}/ctscripts
|
# curate.script.dir = ${dspace.dir}/ctscripts
|
||||||
|
|
||||||
|
# Maximum amount of redirects set to 0 for none and -1 for unlimited
|
||||||
|
curate.checklinks.max-redirect = 0
|
||||||
|
Reference in New Issue
Block a user