Cleanup formatting of robots.txt so there are no blank lines in a group

record
2025-10-07 10:04:21 +00:00 · 2014-12-17 21:50:23 +00:00
parent c43f050265
commit 68d4edabf2
2 changed files with 38 additions and 28 deletions
--- a/dspace-jspui/src/main/webapp/robots.txt
+++ b/dspace-jspui/src/main/webapp/robots.txt
@@ -1,34 +1,39 @@
-User-agent: *
+# The FULL URL to the DSpace sitemaps
 # Disable access to Discovery search and filters
 Disallow: /discover 
 Disallow: /simple-search
 # The FULL URL to your DSpace sitemaps
 # The ${dspace.url} will be auto-filled with the value in dspace.cfg
 # XML sitemap is listed first as it is preferred by most search engines
 Sitemap: ${dspace.url}/sitemap
 Sitemap: ${dspace.url}/htmlmap
 ##########################
 # Default Access Group
 # (NOTE: blank lines are not allowable in a group record)
 ##########################
 User-agent: *
 # Disable access to Discovery search and filters
 Disallow: /discover 
 Disallow: /simple-search
 #
 # Optionally uncomment the following line ONLY if sitemaps are working
 # and you have verified that your site is being indexed correctly.
 # Disallow: /browse
-
+#
 # If you have configured DSpace (Solr-based) Statistics to be publicly 
 # accessible, then you may not want this content to be indexed
 # Disallow: /statistics
-
+#
 # You also may wish to disallow access to the following paths, in order
-# to stop web spiders from accessing user-based content:
+# to stop web spiders from accessing user-based content
 # Disallow: /contact
 # Disallow: /feedback
 # Disallow: /forgot
 # Disallow: /login
 # Disallow: /register
-##############
+
-# Section with misbehaving bots
+##############################
-# The following directives to block specific robots was borrowed from Wikipedia's robots.txt
+# Section for misbehaving bots
-##############
+# The following directives to block specific robots were borrowed from Wikipedia's robots.txt
 ##############################
 # advertising-related bots:
 User-agent: Mediapartners-Google*
@@ -139,4 +144,4 @@ Disallow: /
 # A capture bot, downloads gazillions of pages with no public benefit
 # http://www.webreaper.net/
 User-agent: WebReaper
-Disallow: /
+Disallow: /
--- a/dspace-xmlui/src/main/webapp/static/robots.txt
+++ b/dspace-xmlui/src/main/webapp/static/robots.txt
@@ -1,34 +1,39 @@
-User-agent: *
+# The FULL URL to the DSpace sitemaps
 # Disable access to Discovery search and filters
 Disallow: /discover 
 Disallow: /search-filter
 # The FULL URL to your DSpace sitemaps
 # The ${dspace.url} will be auto-filled with the value in dspace.cfg
 # XML sitemap is listed first as it is preferred by most search engines
 Sitemap: ${dspace.url}/sitemap
 Sitemap: ${dspace.url}/htmlmap
 ##########################
 # Default Access Group
 # (NOTE: blank lines are not allowable in a group record)
 ##########################
 User-agent: *
 # Disable access to Discovery search and filters
 Disallow: /discover
 Disallow: /search-filter
 #
 # Optionally uncomment the following line ONLY if sitemaps are working
 # and you have verified that your site is being indexed correctly.
 # Disallow: /browse
-
+#
 # If you have configured DSpace (Solr-based) Statistics to be publicly 
 # accessible, then you may not want this content to be indexed
 # Disallow: /statistics
-
+#
 # You also may wish to disallow access to the following paths, in order
-# to stop web spiders from accessing user-based content:
+# to stop web spiders from accessing user-based content
 # Disallow: /contact
 # Disallow: /feedback
 # Disallow: /forgot
 # Disallow: /login
 # Disallow: /register
-##############
+
-# Section with misbehaving bots
+##############################
-# The following directives to block specific robots was borrowed from Wikipedia's robots.txt
+# Section for misbehaving bots
-##############
+# The following directives to block specific robots were borrowed from Wikipedia's robots.txt
 ##############################
 # advertising-related bots:
 User-agent: Mediapartners-Google*
@@ -139,4 +144,4 @@ Disallow: /
 # A capture bot, downloads gazillions of pages with no public benefit
 # http://www.webreaper.net/
 User-agent: WebReaper
-Disallow: /
+Disallow: /