Cleanup formatting of robots.txt so there are no blank lines in a group

record
This commit is contained in:
Tim Donohue
2014-12-17 21:50:23 +00:00
parent c43f050265
commit 68d4edabf2
2 changed files with 38 additions and 28 deletions

View File

@@ -1,34 +1,39 @@
User-agent: * # The FULL URL to the DSpace sitemaps
# Disable access to Discovery search and filters
Disallow: /discover
Disallow: /simple-search
# The FULL URL to your DSpace sitemaps
# The ${dspace.url} will be auto-filled with the value in dspace.cfg # The ${dspace.url} will be auto-filled with the value in dspace.cfg
# XML sitemap is listed first as it is preferred by most search engines # XML sitemap is listed first as it is preferred by most search engines
Sitemap: ${dspace.url}/sitemap Sitemap: ${dspace.url}/sitemap
Sitemap: ${dspace.url}/htmlmap Sitemap: ${dspace.url}/htmlmap
##########################
# Default Access Group
# (NOTE: blank lines are not allowable in a group record)
##########################
User-agent: *
# Disable access to Discovery search and filters
Disallow: /discover
Disallow: /simple-search
#
# Optionally uncomment the following line ONLY if sitemaps are working # Optionally uncomment the following line ONLY if sitemaps are working
# and you have verified that your site is being indexed correctly. # and you have verified that your site is being indexed correctly.
# Disallow: /browse # Disallow: /browse
#
# If you have configured DSpace (Solr-based) Statistics to be publicly # If you have configured DSpace (Solr-based) Statistics to be publicly
# accessible, then you may not want this content to be indexed # accessible, then you may not want this content to be indexed
# Disallow: /statistics # Disallow: /statistics
#
# You also may wish to disallow access to the following paths, in order # You also may wish to disallow access to the following paths, in order
# to stop web spiders from accessing user-based content: # to stop web spiders from accessing user-based content
# Disallow: /contact # Disallow: /contact
# Disallow: /feedback # Disallow: /feedback
# Disallow: /forgot # Disallow: /forgot
# Disallow: /login # Disallow: /login
# Disallow: /register # Disallow: /register
##############
# Section with misbehaving bots ##############################
# The following directives to block specific robots was borrowed from Wikipedia's robots.txt # Section for misbehaving bots
############## # The following directives to block specific robots were borrowed from Wikipedia's robots.txt
##############################
# advertising-related bots: # advertising-related bots:
User-agent: Mediapartners-Google* User-agent: Mediapartners-Google*
@@ -139,4 +144,4 @@ Disallow: /
# A capture bot, downloads gazillions of pages with no public benefit # A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/ # http://www.webreaper.net/
User-agent: WebReaper User-agent: WebReaper
Disallow: / Disallow: /

View File

@@ -1,34 +1,39 @@
User-agent: * # The FULL URL to the DSpace sitemaps
# Disable access to Discovery search and filters
Disallow: /discover
Disallow: /search-filter
# The FULL URL to your DSpace sitemaps
# The ${dspace.url} will be auto-filled with the value in dspace.cfg # The ${dspace.url} will be auto-filled with the value in dspace.cfg
# XML sitemap is listed first as it is preferred by most search engines # XML sitemap is listed first as it is preferred by most search engines
Sitemap: ${dspace.url}/sitemap Sitemap: ${dspace.url}/sitemap
Sitemap: ${dspace.url}/htmlmap Sitemap: ${dspace.url}/htmlmap
##########################
# Default Access Group
# (NOTE: blank lines are not allowable in a group record)
##########################
User-agent: *
# Disable access to Discovery search and filters
Disallow: /discover
Disallow: /search-filter
#
# Optionally uncomment the following line ONLY if sitemaps are working # Optionally uncomment the following line ONLY if sitemaps are working
# and you have verified that your site is being indexed correctly. # and you have verified that your site is being indexed correctly.
# Disallow: /browse # Disallow: /browse
#
# If you have configured DSpace (Solr-based) Statistics to be publicly # If you have configured DSpace (Solr-based) Statistics to be publicly
# accessible, then you may not want this content to be indexed # accessible, then you may not want this content to be indexed
# Disallow: /statistics # Disallow: /statistics
#
# You also may wish to disallow access to the following paths, in order # You also may wish to disallow access to the following paths, in order
# to stop web spiders from accessing user-based content: # to stop web spiders from accessing user-based content
# Disallow: /contact # Disallow: /contact
# Disallow: /feedback # Disallow: /feedback
# Disallow: /forgot # Disallow: /forgot
# Disallow: /login # Disallow: /login
# Disallow: /register # Disallow: /register
##############
# Section with misbehaving bots ##############################
# The following directives to block specific robots was borrowed from Wikipedia's robots.txt # Section for misbehaving bots
############## # The following directives to block specific robots were borrowed from Wikipedia's robots.txt
##############################
# advertising-related bots: # advertising-related bots:
User-agent: Mediapartners-Google* User-agent: Mediapartners-Google*
@@ -139,4 +144,4 @@ Disallow: /
# A capture bot, downloads gazillions of pages with no public benefit # A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/ # http://www.webreaper.net/
User-agent: WebReaper User-agent: WebReaper
Disallow: / Disallow: /