Suggest retry timing when we throttle server starts

Fixes #1706
This commit is contained in:
yuvipanda
2018-03-08 02:50:53 -08:00
committed by Min RK
parent 6a47123ec9
commit 8898faa141
2 changed files with 43 additions and 7 deletions

View File

@@ -616,6 +616,31 @@ class JupyterHub(Application):
"""
).tag(config=True)
throttle_retry_suggest_min = Integer(
30,
help="""
Minimum seconds after which we suggest the user retry spawning.
When `concurrent_spawn_limit` is exceeded, we recommend users retry
after a random period of time, bounded by throttle_retry_suggest_min
and throttle_retry_suggest_max.
throttle_retry_suggest_min should ideally be set to the median
spawn time of servers in your installation.
"""
)
throttle_retry_suggest_max = Integer(
60,
help="""
Minimum seconds after which we suggest the user retry spawning.
When `concurrent_spawn_limit` is exceeded, we recommend users retry
after a random period of time, bounded by throttle_retry_suggest_min
and throttle_retry_suggest_max.
"""
)
active_server_limit = Integer(
0,
help="""
@@ -1423,6 +1448,8 @@ class JupyterHub(Application):
allow_named_servers=self.allow_named_servers,
oauth_provider=self.oauth_provider,
concurrent_spawn_limit=self.concurrent_spawn_limit,
throttle_retry_suggest_min=self.throttle_retry_suggest_min,
throttle_retry_suggest_max=self.throttle_retry_suggest_max,
active_server_limit=self.active_server_limit,
)
# allow configured settings to have priority

View File

@@ -10,6 +10,7 @@ from datetime import datetime, timedelta
from http.client import responses
from urllib.parse import urlparse, urlunparse, parse_qs, urlencode
import uuid
import random
from jinja2 import TemplateNotFound
@@ -526,17 +527,25 @@ class BaseHandler(RequestHandler):
active_server_limit = self.active_server_limit
if concurrent_spawn_limit and spawn_pending_count >= concurrent_spawn_limit:
self.log.info(
'%s pending spawns, throttling',
spawn_pending_count,
)
SERVER_SPAWN_DURATION_SECONDS.labels(
status=ServerSpawnStatus.throttled
).observe(time.perf_counter() - spawn_start_time)
raise web.HTTPError(
429,
"User startup rate limit exceeded. Try again in a few minutes.",
# Suggest number of seconds client should wait before retrying
# This helps prevent thundering herd problems, where users simply
# immediately retry when we are overloaded.
retry_time = int(random.uniform(
self.settings['throttle_retry_suggest_min'],
self.settings['throttle_retry_suggest_max']
))
self.set_header('Retry-After', str(retry_time))
self.log.info(
'%s pending spawns, throttling. Retry in %s seconds',
spawn_pending_count, retry_time
)
self.set_status(429, "Too many users trying to log in right now. Try again in a {}s".format(retry_time))
# We use set_status and then raise web.Finish, since raising web.HTTPError resets any headers we wanna send.
raise web.Finish()
if active_server_limit and active_count >= active_server_limit:
self.log.info(
'%s servers active, no space available',