Cleanup formatting of robots.txt so there are no blank lines in a group

record
2025-10-07 01:54:22 +00:00 · 2014-12-17 21:50:23 +00:00
parent c43f050265
commit 68d4edabf2
2 changed files with 38 additions and 28 deletions
--- a/dspace-jspui/src/main/webapp/robots.txt
+++ b/dspace-jspui/src/main/webapp/robots.txt
@@ -1,34 +1,39 @@
-User-agent: *
-# Disable access to Discovery search and filters
-Disallow: /discover 
-Disallow: /simple-search
-
-# The FULL URL to your DSpace sitemaps
+# The FULL URL to the DSpace sitemaps
 # The ${dspace.url} will be auto-filled with the value in dspace.cfg
 # XML sitemap is listed first as it is preferred by most search engines
 Sitemap: ${dspace.url}/sitemap
 Sitemap: ${dspace.url}/htmlmap

+##########################
+# Default Access Group
+# (NOTE: blank lines are not allowable in a group record)
+##########################
+User-agent: *
+# Disable access to Discovery search and filters
+Disallow: /discover 
+Disallow: /simple-search
+#
 # Optionally uncomment the following line ONLY if sitemaps are working
 # and you have verified that your site is being indexed correctly.
 # Disallow: /browse
-
+#
 # If you have configured DSpace (Solr-based) Statistics to be publicly 
 # accessible, then you may not want this content to be indexed
 # Disallow: /statistics
-
+#
 # You also may wish to disallow access to the following paths, in order
-# to stop web spiders from accessing user-based content:
+# to stop web spiders from accessing user-based content
 # Disallow: /contact
 # Disallow: /feedback
 # Disallow: /forgot
 # Disallow: /login
 # Disallow: /register

-##############
-# Section with misbehaving bots
-# The following directives to block specific robots was borrowed from Wikipedia's robots.txt
-##############
+
+##############################
+# Section for misbehaving bots
+# The following directives to block specific robots were borrowed from Wikipedia's robots.txt
+##############################

 # advertising-related bots:
 User-agent: Mediapartners-Google*
@@ -139,4 +144,4 @@ Disallow: /
 # A capture bot, downloads gazillions of pages with no public benefit
 # http://www.webreaper.net/
 User-agent: WebReaper
-Disallow: /
+Disallow: /
--- a/dspace-xmlui/src/main/webapp/static/robots.txt
+++ b/dspace-xmlui/src/main/webapp/static/robots.txt
@@ -1,34 +1,39 @@
-User-agent: *
-# Disable access to Discovery search and filters
-Disallow: /discover 
-Disallow: /search-filter
-
-# The FULL URL to your DSpace sitemaps
+# The FULL URL to the DSpace sitemaps
 # The ${dspace.url} will be auto-filled with the value in dspace.cfg
 # XML sitemap is listed first as it is preferred by most search engines
 Sitemap: ${dspace.url}/sitemap
 Sitemap: ${dspace.url}/htmlmap

+##########################
+# Default Access Group
+# (NOTE: blank lines are not allowable in a group record)
+##########################
+User-agent: *
+# Disable access to Discovery search and filters
+Disallow: /discover
+Disallow: /search-filter
+#
 # Optionally uncomment the following line ONLY if sitemaps are working
 # and you have verified that your site is being indexed correctly.
 # Disallow: /browse
-
+#
 # If you have configured DSpace (Solr-based) Statistics to be publicly 
 # accessible, then you may not want this content to be indexed
 # Disallow: /statistics
-
+#
 # You also may wish to disallow access to the following paths, in order
-# to stop web spiders from accessing user-based content:
+# to stop web spiders from accessing user-based content
 # Disallow: /contact
 # Disallow: /feedback
 # Disallow: /forgot
 # Disallow: /login
 # Disallow: /register

-##############
-# Section with misbehaving bots
-# The following directives to block specific robots was borrowed from Wikipedia's robots.txt
-##############
+
+##############################
+# Section for misbehaving bots
+# The following directives to block specific robots were borrowed from Wikipedia's robots.txt
+##############################

 # advertising-related bots:
 User-agent: Mediapartners-Google*
@@ -139,4 +144,4 @@ Disallow: /
 # A capture bot, downloads gazillions of pages with no public benefit
 # http://www.webreaper.net/
 User-agent: WebReaper
-Disallow: /
+Disallow: /