From 4bd8a24ca75f6d2e6384e850b45c96c4f1229f02 Mon Sep 17 00:00:00 2001 From: Jens Vannerum Date: Tue, 11 Mar 2025 10:58:08 +0100 Subject: [PATCH 1/5] Implement a SEOHealthIndicator which verifies all relevant parameters for SEO are ok --- .../configuration/ActuatorConfiguration.java | 7 ++ .../app/rest/health/SEOHealthIndicator.java | 77 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/configuration/ActuatorConfiguration.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/configuration/ActuatorConfiguration.java index 08a7e9aec8..670cff8458 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/configuration/ActuatorConfiguration.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/configuration/ActuatorConfiguration.java @@ -14,6 +14,7 @@ import java.util.Arrays; import org.apache.solr.client.solrj.SolrServerException; import org.dspace.app.rest.DiscoverableEndpointsService; import org.dspace.app.rest.health.GeoIpHealthIndicator; +import org.dspace.app.rest.health.SEOHealthIndicator; import org.dspace.app.rest.health.SolrHealthIndicator; import org.dspace.authority.AuthoritySolrServiceImpl; import org.dspace.discovery.SolrSearchCore; @@ -82,6 +83,12 @@ public class ActuatorConfiguration { return new SolrHealthIndicator(solrServerResolver.getServer()); } + @Bean + @ConditionalOnEnabledHealthIndicator("seo") + public SEOHealthIndicator seoHealthIndicator() { + return new SEOHealthIndicator(); + } + @Bean @ConditionalOnEnabledHealthIndicator("geoIp") public GeoIpHealthIndicator geoIpHealthIndicator() { diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java new file mode 100644 index 0000000000..d936fce635 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java @@ -0,0 +1,77 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.health; + +import org.apache.commons.lang3.StringUtils; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.springframework.boot.actuate.health.AbstractHealthIndicator; +import org.springframework.boot.actuate.health.Health; +import org.springframework.web.client.RestTemplate; + +/** + * Implementation of {@link org.springframework.boot.actuate.health.HealthIndicator} that verifies if the SEO of the + * DSpace instance is configured correctly. + * + * This is only relevant in a production environment, where the DSpace instance is exposed to the public. + */ +public class SEOHealthIndicator extends AbstractHealthIndicator { + + ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); + + private final RestTemplate restTemplate = new RestTemplate(); + + @Override + protected void doHealthCheck(Health.Builder builder) { + String baseUrl = configurationService.getProperty("dspace.ui.url"); + + boolean sitemapOk = checkUrl(baseUrl + "/sitemap_index.xml") || checkUrl(baseUrl + "/sitemap_index.html"); + boolean robotsTxtOk = checkRobotsTxt(baseUrl + "/robots.txt"); + boolean ssrOk = checkSSR(baseUrl); + + if (sitemapOk && robotsTxtOk && ssrOk) { + builder.up() + .withDetail("sitemap", "OK") + .withDetail("robots.txt", "OK") + .withDetail("ssr", "OK"); + } else { + builder.down() + .withDetail("sitemap", sitemapOk ? "OK" : "Missing or inaccessible") + .withDetail("robots.txt", robotsTxtOk ? "OK" : "Empty or contains local URLs") + .withDetail("ssr", ssrOk ? "OK" : "Server-side rendering might be disabled"); + } + } + + private boolean checkUrl(String url) { + try { + restTemplate.getForEntity(url, String.class); + return true; + } catch (Exception e) { + return false; + } + } + + private boolean checkRobotsTxt(String url) { + try { + String content = restTemplate.getForObject(url, String.class); + return StringUtils.isNotBlank(content) && !content.contains("localhost"); + } catch (Exception e) { + return false; + } + } + + private boolean checkSSR(String url) { + try { + String content = restTemplate.getForObject(url, String.class); + return content != null && !content.contains(""); + } catch (Exception e) { + return false; + } + } +} + From 20ab43ccccf84c83d6db9b431321e60256d30355 Mon Sep 17 00:00:00 2001 From: Jens Vannerum Date: Tue, 18 Mar 2025 10:04:36 +0100 Subject: [PATCH 2/5] Disable new actuator in IT --- dspace-api/src/test/data/dspaceFolder/config/local.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index 1aaacd4e24..65818dd50e 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -158,6 +158,7 @@ proxies.trusted.include_ui_ip = true # For the tests we have to disable this health indicator because there isn't a mock server and the calculated status was DOWN management.health.solrOai.enabled = false +management.health.seo.enabled = false # Enable researcher profiles and orcid synchronization for tests researcher-profile.entity-type = Person @@ -192,4 +193,4 @@ ldn.notify.inbox.block-untrusted-ip = true # ERROR LOGGING # ########################################### # Log full stacktrace of other common 4xx errors (for easier debugging of these errors in tests) -logging.server.include-stacktrace-for-httpcode = 422, 400 \ No newline at end of file +logging.server.include-stacktrace-for-httpcode = 422, 400 From 32c048428026f890c2f3bc7eca5a2f20717dc587 Mon Sep 17 00:00:00 2001 From: Jens Vannerum Date: Tue, 1 Apr 2025 17:54:48 +0200 Subject: [PATCH 3/5] 127746: Implement different failures for robots file so we can differentiate between a missing file or an invalid file --- .../app/rest/health/SEOHealthIndicator.java | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java index d936fce635..a071e12088 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java @@ -31,19 +31,28 @@ public class SEOHealthIndicator extends AbstractHealthIndicator { String baseUrl = configurationService.getProperty("dspace.ui.url"); boolean sitemapOk = checkUrl(baseUrl + "/sitemap_index.xml") || checkUrl(baseUrl + "/sitemap_index.html"); - boolean robotsTxtOk = checkRobotsTxt(baseUrl + "/robots.txt"); + RobotsTxtStatus robotsTxtStatus = checkRobotsTxt(baseUrl + "/robots.txt"); boolean ssrOk = checkSSR(baseUrl); - if (sitemapOk && robotsTxtOk && ssrOk) { + if (sitemapOk && robotsTxtStatus == RobotsTxtStatus.VALID && ssrOk) { builder.up() .withDetail("sitemap", "OK") .withDetail("robots.txt", "OK") .withDetail("ssr", "OK"); } else { - builder.down() - .withDetail("sitemap", sitemapOk ? "OK" : "Missing or inaccessible") - .withDetail("robots.txt", robotsTxtOk ? "OK" : "Empty or contains local URLs") - .withDetail("ssr", ssrOk ? "OK" : "Server-side rendering might be disabled"); + builder.down(); + builder.withDetail("sitemap", sitemapOk ? "OK" : "Missing or inaccessible"); + + if (robotsTxtStatus == RobotsTxtStatus.MISSING) { + builder.withDetail("robots.txt", "Missing or inaccessible. Please see the DSpace Documentation on " + + "Search Engine Optimization for how to create a robots.txt."); + } else if (robotsTxtStatus == RobotsTxtStatus.INVALID) { + builder.withDetail("robots.txt", "Invalid because it contains localhost URLs. This is often a sign " + + "that a proxy is failing to pass X-Forwarded headers to DSpace. Please see the DSpace " + + "Documentation on Search Engine Optimization for how to pass X-Forwarded headers."); + } + + builder.withDetail("ssr", ssrOk ? "OK" : "Server-side rendering might be disabled"); } } @@ -56,12 +65,18 @@ public class SEOHealthIndicator extends AbstractHealthIndicator { } } - private boolean checkRobotsTxt(String url) { + private RobotsTxtStatus checkRobotsTxt(String url) { try { String content = restTemplate.getForObject(url, String.class); - return StringUtils.isNotBlank(content) && !content.contains("localhost"); + if (StringUtils.isBlank(content)) { + return RobotsTxtStatus.MISSING; + } + if (content.contains("localhost")) { + return RobotsTxtStatus.INVALID; + } + return RobotsTxtStatus.VALID; } catch (Exception e) { - return false; + return RobotsTxtStatus.MISSING; } } @@ -73,5 +88,9 @@ public class SEOHealthIndicator extends AbstractHealthIndicator { return false; } } + + private enum RobotsTxtStatus { + VALID, MISSING, INVALID + } } From 170dc9a44c5b16c28298a9e3133534e70967147d Mon Sep 17 00:00:00 2001 From: Jens Vannerum Date: Tue, 1 Apr 2025 17:58:47 +0200 Subject: [PATCH 4/5] 127746: Add more detailed information messages on how to solve problems --- .../org/dspace/app/rest/health/SEOHealthIndicator.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java index a071e12088..740c6ab649 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java @@ -41,7 +41,8 @@ public class SEOHealthIndicator extends AbstractHealthIndicator { .withDetail("ssr", "OK"); } else { builder.down(); - builder.withDetail("sitemap", sitemapOk ? "OK" : "Missing or inaccessible"); + builder.withDetail("sitemap", sitemapOk ? "OK" : "Sitemaps are missing or inaccessible. Please see the " + + "DSpace Documentation on Search Engine Optimization for how to enable Sitemaps."); if (robotsTxtStatus == RobotsTxtStatus.MISSING) { builder.withDetail("robots.txt", "Missing or inaccessible. Please see the DSpace Documentation on " + @@ -51,8 +52,9 @@ public class SEOHealthIndicator extends AbstractHealthIndicator { "that a proxy is failing to pass X-Forwarded headers to DSpace. Please see the DSpace " + "Documentation on Search Engine Optimization for how to pass X-Forwarded headers."); } - - builder.withDetail("ssr", ssrOk ? "OK" : "Server-side rendering might be disabled"); + builder.withDetail("ssr", ssrOk ? "OK" : "Server-side rendering (SSR) appears to be disabled. Most " + + "search engines require enabling SSR for proper indexing. Please see the DSpace Documentation on" + + " Search Engine Optimization for more details."); } } From 5dc12775fac0006dbc1d0106ffcdffbe893919d1 Mon Sep 17 00:00:00 2001 From: Jens Vannerum Date: Fri, 4 Apr 2025 16:51:56 +0200 Subject: [PATCH 5/5] 127746: Include success result for robots.txt check if other checks fail --- .../java/org/dspace/app/rest/health/SEOHealthIndicator.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java index 740c6ab649..5b57f2d537 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/health/SEOHealthIndicator.java @@ -51,6 +51,8 @@ public class SEOHealthIndicator extends AbstractHealthIndicator { builder.withDetail("robots.txt", "Invalid because it contains localhost URLs. This is often a sign " + "that a proxy is failing to pass X-Forwarded headers to DSpace. Please see the DSpace " + "Documentation on Search Engine Optimization for how to pass X-Forwarded headers."); + } else { + builder.withDetail("robots.txt", "OK"); } builder.withDetail("ssr", ssrOk ? "OK" : "Server-side rendering (SSR) appears to be disabled. Most " + "search engines require enabling SSR for proper indexing. Please see the DSpace Documentation on" +