mirror of
https://github.com/DSpace/DSpace.git
synced 2025-10-07 10:04:21 +00:00
Cleanup formatting of robots.txt so there are no blank lines in a group
record
This commit is contained in:
@@ -1,34 +1,39 @@
|
|||||||
User-agent: *
|
# The FULL URL to the DSpace sitemaps
|
||||||
# Disable access to Discovery search and filters
|
|
||||||
Disallow: /discover
|
|
||||||
Disallow: /simple-search
|
|
||||||
|
|
||||||
# The FULL URL to your DSpace sitemaps
|
|
||||||
# The ${dspace.url} will be auto-filled with the value in dspace.cfg
|
# The ${dspace.url} will be auto-filled with the value in dspace.cfg
|
||||||
# XML sitemap is listed first as it is preferred by most search engines
|
# XML sitemap is listed first as it is preferred by most search engines
|
||||||
Sitemap: ${dspace.url}/sitemap
|
Sitemap: ${dspace.url}/sitemap
|
||||||
Sitemap: ${dspace.url}/htmlmap
|
Sitemap: ${dspace.url}/htmlmap
|
||||||
|
|
||||||
|
##########################
|
||||||
|
# Default Access Group
|
||||||
|
# (NOTE: blank lines are not allowable in a group record)
|
||||||
|
##########################
|
||||||
|
User-agent: *
|
||||||
|
# Disable access to Discovery search and filters
|
||||||
|
Disallow: /discover
|
||||||
|
Disallow: /simple-search
|
||||||
|
#
|
||||||
# Optionally uncomment the following line ONLY if sitemaps are working
|
# Optionally uncomment the following line ONLY if sitemaps are working
|
||||||
# and you have verified that your site is being indexed correctly.
|
# and you have verified that your site is being indexed correctly.
|
||||||
# Disallow: /browse
|
# Disallow: /browse
|
||||||
|
#
|
||||||
# If you have configured DSpace (Solr-based) Statistics to be publicly
|
# If you have configured DSpace (Solr-based) Statistics to be publicly
|
||||||
# accessible, then you may not want this content to be indexed
|
# accessible, then you may not want this content to be indexed
|
||||||
# Disallow: /statistics
|
# Disallow: /statistics
|
||||||
|
#
|
||||||
# You also may wish to disallow access to the following paths, in order
|
# You also may wish to disallow access to the following paths, in order
|
||||||
# to stop web spiders from accessing user-based content:
|
# to stop web spiders from accessing user-based content
|
||||||
# Disallow: /contact
|
# Disallow: /contact
|
||||||
# Disallow: /feedback
|
# Disallow: /feedback
|
||||||
# Disallow: /forgot
|
# Disallow: /forgot
|
||||||
# Disallow: /login
|
# Disallow: /login
|
||||||
# Disallow: /register
|
# Disallow: /register
|
||||||
|
|
||||||
##############
|
|
||||||
# Section with misbehaving bots
|
##############################
|
||||||
# The following directives to block specific robots was borrowed from Wikipedia's robots.txt
|
# Section for misbehaving bots
|
||||||
##############
|
# The following directives to block specific robots were borrowed from Wikipedia's robots.txt
|
||||||
|
##############################
|
||||||
|
|
||||||
# advertising-related bots:
|
# advertising-related bots:
|
||||||
User-agent: Mediapartners-Google*
|
User-agent: Mediapartners-Google*
|
||||||
@@ -139,4 +144,4 @@ Disallow: /
|
|||||||
# A capture bot, downloads gazillions of pages with no public benefit
|
# A capture bot, downloads gazillions of pages with no public benefit
|
||||||
# http://www.webreaper.net/
|
# http://www.webreaper.net/
|
||||||
User-agent: WebReaper
|
User-agent: WebReaper
|
||||||
Disallow: /
|
Disallow: /
|
||||||
|
@@ -1,34 +1,39 @@
|
|||||||
User-agent: *
|
# The FULL URL to the DSpace sitemaps
|
||||||
# Disable access to Discovery search and filters
|
|
||||||
Disallow: /discover
|
|
||||||
Disallow: /search-filter
|
|
||||||
|
|
||||||
# The FULL URL to your DSpace sitemaps
|
|
||||||
# The ${dspace.url} will be auto-filled with the value in dspace.cfg
|
# The ${dspace.url} will be auto-filled with the value in dspace.cfg
|
||||||
# XML sitemap is listed first as it is preferred by most search engines
|
# XML sitemap is listed first as it is preferred by most search engines
|
||||||
Sitemap: ${dspace.url}/sitemap
|
Sitemap: ${dspace.url}/sitemap
|
||||||
Sitemap: ${dspace.url}/htmlmap
|
Sitemap: ${dspace.url}/htmlmap
|
||||||
|
|
||||||
|
##########################
|
||||||
|
# Default Access Group
|
||||||
|
# (NOTE: blank lines are not allowable in a group record)
|
||||||
|
##########################
|
||||||
|
User-agent: *
|
||||||
|
# Disable access to Discovery search and filters
|
||||||
|
Disallow: /discover
|
||||||
|
Disallow: /search-filter
|
||||||
|
#
|
||||||
# Optionally uncomment the following line ONLY if sitemaps are working
|
# Optionally uncomment the following line ONLY if sitemaps are working
|
||||||
# and you have verified that your site is being indexed correctly.
|
# and you have verified that your site is being indexed correctly.
|
||||||
# Disallow: /browse
|
# Disallow: /browse
|
||||||
|
#
|
||||||
# If you have configured DSpace (Solr-based) Statistics to be publicly
|
# If you have configured DSpace (Solr-based) Statistics to be publicly
|
||||||
# accessible, then you may not want this content to be indexed
|
# accessible, then you may not want this content to be indexed
|
||||||
# Disallow: /statistics
|
# Disallow: /statistics
|
||||||
|
#
|
||||||
# You also may wish to disallow access to the following paths, in order
|
# You also may wish to disallow access to the following paths, in order
|
||||||
# to stop web spiders from accessing user-based content:
|
# to stop web spiders from accessing user-based content
|
||||||
# Disallow: /contact
|
# Disallow: /contact
|
||||||
# Disallow: /feedback
|
# Disallow: /feedback
|
||||||
# Disallow: /forgot
|
# Disallow: /forgot
|
||||||
# Disallow: /login
|
# Disallow: /login
|
||||||
# Disallow: /register
|
# Disallow: /register
|
||||||
|
|
||||||
##############
|
|
||||||
# Section with misbehaving bots
|
##############################
|
||||||
# The following directives to block specific robots was borrowed from Wikipedia's robots.txt
|
# Section for misbehaving bots
|
||||||
##############
|
# The following directives to block specific robots were borrowed from Wikipedia's robots.txt
|
||||||
|
##############################
|
||||||
|
|
||||||
# advertising-related bots:
|
# advertising-related bots:
|
||||||
User-agent: Mediapartners-Google*
|
User-agent: Mediapartners-Google*
|
||||||
@@ -139,4 +144,4 @@ Disallow: /
|
|||||||
# A capture bot, downloads gazillions of pages with no public benefit
|
# A capture bot, downloads gazillions of pages with no public benefit
|
||||||
# http://www.webreaper.net/
|
# http://www.webreaper.net/
|
||||||
User-agent: WebReaper
|
User-agent: WebReaper
|
||||||
Disallow: /
|
Disallow: /
|
||||||
|
Reference in New Issue
Block a user