diff --git a/dspace-jspui/src/main/webapp/robots.txt b/dspace-jspui/src/main/webapp/robots.txt index 4faadbe5ee..4c828e48c7 100644 --- a/dspace-jspui/src/main/webapp/robots.txt +++ b/dspace-jspui/src/main/webapp/robots.txt @@ -1,34 +1,39 @@ -User-agent: * -# Disable access to Discovery search and filters -Disallow: /discover -Disallow: /simple-search - -# The FULL URL to your DSpace sitemaps +# The FULL URL to the DSpace sitemaps # The ${dspace.url} will be auto-filled with the value in dspace.cfg # XML sitemap is listed first as it is preferred by most search engines Sitemap: ${dspace.url}/sitemap Sitemap: ${dspace.url}/htmlmap +########################## +# Default Access Group +# (NOTE: blank lines are not allowable in a group record) +########################## +User-agent: * +# Disable access to Discovery search and filters +Disallow: /discover +Disallow: /simple-search +# # Optionally uncomment the following line ONLY if sitemaps are working # and you have verified that your site is being indexed correctly. # Disallow: /browse - +# # If you have configured DSpace (Solr-based) Statistics to be publicly # accessible, then you may not want this content to be indexed # Disallow: /statistics - +# # You also may wish to disallow access to the following paths, in order -# to stop web spiders from accessing user-based content: +# to stop web spiders from accessing user-based content # Disallow: /contact # Disallow: /feedback # Disallow: /forgot # Disallow: /login # Disallow: /register -############## -# Section with misbehaving bots -# The following directives to block specific robots was borrowed from Wikipedia's robots.txt -############## + +############################## +# Section for misbehaving bots +# The following directives to block specific robots were borrowed from Wikipedia's robots.txt +############################## # advertising-related bots: User-agent: Mediapartners-Google* @@ -139,4 +144,4 @@ Disallow: / # A capture bot, downloads gazillions of pages with no public benefit # http://www.webreaper.net/ User-agent: WebReaper -Disallow: / \ No newline at end of file +Disallow: / diff --git a/dspace-xmlui/src/main/webapp/static/robots.txt b/dspace-xmlui/src/main/webapp/static/robots.txt index ffc18f2cf4..0770d6248c 100644 --- a/dspace-xmlui/src/main/webapp/static/robots.txt +++ b/dspace-xmlui/src/main/webapp/static/robots.txt @@ -1,34 +1,39 @@ -User-agent: * -# Disable access to Discovery search and filters -Disallow: /discover -Disallow: /search-filter - -# The FULL URL to your DSpace sitemaps +# The FULL URL to the DSpace sitemaps # The ${dspace.url} will be auto-filled with the value in dspace.cfg # XML sitemap is listed first as it is preferred by most search engines Sitemap: ${dspace.url}/sitemap Sitemap: ${dspace.url}/htmlmap +########################## +# Default Access Group +# (NOTE: blank lines are not allowable in a group record) +########################## +User-agent: * +# Disable access to Discovery search and filters +Disallow: /discover +Disallow: /search-filter +# # Optionally uncomment the following line ONLY if sitemaps are working # and you have verified that your site is being indexed correctly. # Disallow: /browse - +# # If you have configured DSpace (Solr-based) Statistics to be publicly # accessible, then you may not want this content to be indexed # Disallow: /statistics - +# # You also may wish to disallow access to the following paths, in order -# to stop web spiders from accessing user-based content: +# to stop web spiders from accessing user-based content # Disallow: /contact # Disallow: /feedback # Disallow: /forgot # Disallow: /login # Disallow: /register -############## -# Section with misbehaving bots -# The following directives to block specific robots was borrowed from Wikipedia's robots.txt -############## + +############################## +# Section for misbehaving bots +# The following directives to block specific robots were borrowed from Wikipedia's robots.txt +############################## # advertising-related bots: User-agent: Mediapartners-Google* @@ -139,4 +144,4 @@ Disallow: / # A capture bot, downloads gazillions of pages with no public benefit # http://www.webreaper.net/ User-agent: WebReaper -Disallow: / \ No newline at end of file +Disallow: /