Compare commits

...

18 Commits

Author SHA1 Message Date
Min RK
6c89de082f 0.8.0b5 2017-09-08 11:19:25 +02:00
Carol Willing
6fb31cc613 Merge pull request #1393 from minrk/spawn-future
improve reporting of spawn failure
2017-09-07 10:20:38 -07:00
Carol Willing
cfb22baf05 Merge pull request #1399 from minrk/trailing-slash
add trailing slash on /user/name
2017-09-07 09:59:58 -07:00
Min RK
2d0c1ff0a8 Merge pull request #1404 from minrk/sqla-11
we require sqlalchemy 1.1
2017-09-07 16:48:13 +02:00
Min RK
7789e13879 we require sqlalchemy 1.1
for enum support

[ref](http://docs.sqlalchemy.org/en/latest/changelog/changelog_11.html#change-9d6d98d7acabc8564b8eebb11c28a624)
2017-09-07 15:10:48 +02:00
Yuvi Panda
f7b90e2c09 Merge pull request #1400 from minrk/auth-custom-html
allow Authenticator.custom_html to be HTML
2017-09-06 11:56:14 -07:00
Carol Willing
ccb29167dd Merge pull request #1392 from minrk/rm-extra-log
update docs to preferred method of writing to log file
2017-09-06 07:32:25 -07:00
Min RK
4ef1eca3c9 allow Authenticator.custom_html to be HTML 2017-09-06 15:14:26 +02:00
Min RK
c26ede30b9 Point users to /hub/home to retry spawn on spawn failure 2017-09-06 15:03:26 +02:00
Min RK
64c69a3164 update docs to preferred method of writing to log file
extra_log_files config is unreliable and doesn't capture all output.

Piping output is much more robust and reliable.
2017-09-06 14:38:33 +02:00
Min RK
ad7867ff11 add trailing slash on /user/name
proxies may not route `/user/name` correctly, only `/user/name/...`, so make sure that `/user/name` is redirected to `/user/name/`

this manifests as a redirect loop between /user/name and /hub/user/name when a route exists but /user/name is still
being routed to the Hub
2017-09-06 12:37:22 +02:00
Yuvi Panda
14fc1588f8 Merge pull request #1380 from minrk/cull-idle-users
add —cull-users to cull_idle_servers
2017-09-05 12:48:24 -07:00
Min RK
7e5a925f4f raise original spawn failure on implicit spawn
so the error message is the same, however it was arrived at.

potential downside: it could look like the current request is spawning and failing,
rather than the reality that a previous spawn failed and we are just re-presenting the earlier error.
It's possible for there to have been a long time in between spawn and error.
2017-09-04 14:27:01 +02:00
Min RK
3c61e422da prevent implicit spawn on /user/:name if previous spawn failed
require users to visit /hub/home and click 'Start My Server' to get a new server

Visits to /hub/user/:name will get an error if the previous spawn failed,
rather than triggering a new spawn.
This should guarantee that a user sees an error if their spawn failed,
regardless of when the failure occurred and how long it took.
Some cases of slow errors could result in triggering a new spawn indefinitely without
the user seeing an error message.

/hub/spawn was a simple redirect to /user/:name in the absence of a spawn form,
but now clears the `_spawn_future` prior to redirect
to signal that a new spawn has been explicitly requested in the case of a prior failure.
2017-09-04 14:17:24 +02:00
Min RK
0e2cf37981 point to single-user logs when spawner fails to start 2017-09-04 13:14:07 +02:00
Min RK
503d5e389f render pending page if triggered spawn doesn't finish
instead of redirecting, which starts redirect loop counter
2017-09-04 12:02:40 +02:00
Min RK
7b1e61ab2c allow waiting for pending spawn via spawner._spawn_future
avoids losing errors when visiting `/hub/user/:name` during a pending spawn
2017-09-04 11:53:42 +02:00
Min RK
f9a90d2494 add —cull-users to cull_idle_servers
allows deleting idle users in addition to servers for temp-user cases such as binder/tmpnb
2017-08-30 10:31:44 +02:00
11 changed files with 156 additions and 36 deletions

View File

@@ -49,9 +49,6 @@ c.JupyterHub.cookie_secret_file = pjoin(runtime_dir, 'cookie_secret')
c.JupyterHub.db_url = pjoin(runtime_dir, 'jupyterhub.sqlite')
# or `--db=/path/to/jupyterhub.sqlite` on the command-line
# put the log file in /var/log
c.JupyterHub.extra_log_file = '/var/log/jupyterhub.log'
# use GitHub OAuthenticator for local users
c.JupyterHub.authenticator_class = 'oauthenticator.LocalGitHubOAuthenticator'
c.GitHubOAuthenticator.oauth_callback_url = os.environ['OAUTH_CALLBACK_URL']
@@ -79,7 +76,8 @@ export GITHUB_CLIENT_ID=github_id
export GITHUB_CLIENT_SECRET=github_secret
export OAUTH_CALLBACK_URL=https://example.com/hub/oauth_callback
export CONFIGPROXY_AUTH_TOKEN=super-secret
jupyterhub -f /etc/jupyterhub/jupyterhub_config.py
# append log output to log file /var/log/jupyterhub.log
jupyterhub -f /etc/jupyterhub/jupyterhub_config.py &>> /var/log/jupyterhub.log
```
## Using nginx reverse proxy

View File

@@ -40,8 +40,11 @@ from tornado.options import define, options, parse_command_line
@coroutine
def cull_idle(url, api_token, timeout):
"""cull idle single-user servers"""
def cull_idle(url, api_token, timeout, cull_users=False):
"""Shutdown idle single-user servers
If cull_users, inactive *users* will be deleted as well.
"""
auth_header = {
'Authorization': 'token %s' % api_token
}
@@ -54,26 +57,50 @@ def cull_idle(url, api_token, timeout):
resp = yield client.fetch(req)
users = json.loads(resp.body.decode('utf8', 'replace'))
futures = []
for user in users:
last_activity = parse_date(user['last_activity'])
if user['server'] and last_activity < cull_limit:
app_log.info("Culling %s (inactive since %s)", user['name'], last_activity)
@coroutine
def cull_one(user, last_activity):
"""cull one user"""
# shutdown server first. Hub doesn't allow deleting users with running servers.
if user['server']:
app_log.info("Culling server for %s (inactive since %s)", user['name'], last_activity)
req = HTTPRequest(url=url + '/users/%s/server' % user['name'],
method='DELETE',
headers=auth_header,
)
futures.append((user['name'], client.fetch(req)))
elif user['server'] and last_activity > cull_limit:
yield client.fetch(req)
if cull_users:
app_log.info("Culling user %s (inactive since %s)", user['name'], last_activity)
req = HTTPRequest(url=url + '/users/%s' % user['name'],
method='DELETE',
headers=auth_header,
)
yield client.fetch(req)
for user in users:
if not user['server'] and not cull_users:
# server not running and not culling users, nothing to do
continue
last_activity = parse_date(user['last_activity'])
if last_activity < cull_limit:
futures.append((user['name'], cull_one(user, last_activity)))
else:
app_log.debug("Not culling %s (active since %s)", user['name'], last_activity)
for (name, f) in futures:
yield f
app_log.debug("Finished culling %s", name)
if __name__ == '__main__':
define('url', default=os.environ.get('JUPYTERHUB_API_URL'), help="The JupyterHub API URL")
define('timeout', default=600, help="The idle timeout (in seconds)")
define('cull_every', default=0, help="The interval (in seconds) for checking for idle servers to cull")
define('cull_users', default=False,
help="""Cull users in addition to servers.
This is for use in temporary-user cases such as tmpnb.""",
)
parse_command_line()
if not options.cull_every:
@@ -82,7 +109,7 @@ if __name__ == '__main__':
api_token = os.environ['JUPYTERHUB_API_TOKEN']
loop = IOLoop.current()
cull = lambda : cull_idle(options.url, api_token, options.timeout)
cull = lambda : cull_idle(options.url, api_token, options.timeout, options.cull_users)
# run once before scheduling periodic call
loop.run_sync(cull)
# schedule periodic cull

View File

@@ -7,7 +7,7 @@ version_info = (
0,
8,
0,
'b4',
'b5',
)
__version__ = '.'.join(map(str, version_info))

View File

@@ -801,12 +801,10 @@ class JupyterHub(Application):
self.handlers = self.add_url_prefix(self.hub_prefix, h)
# some extra handlers, outside hub_prefix
self.handlers.extend([
(r"%s" % self.hub_prefix.rstrip('/'), web.RedirectHandler,
{
"url": self.hub_prefix,
"permanent": False,
}
),
# add trailing / to `/hub`
(self.hub_prefix.rstrip('/'), handlers.AddSlashHandler),
# add trailing / to ``/user|services/:name`
(r"%s(user|services)/([^/]+)" % self.base_url, handlers.AddSlashHandler),
(r"(?!%s).*" % self.hub_prefix, handlers.PrefixRedirectHandler),
(r'(.*)', handlers.Template404),
])

View File

@@ -376,6 +376,9 @@ class BaseHandler(RequestHandler):
@gen.coroutine
def spawn_single_user(self, user, server_name='', options=None):
# in case of error, include 'try again from /hub/home' message
self.extra_error_html = self.spawn_home_error
user_server_name = user.name
if self.allow_named_servers and not server_name:
server_name = default_server_name(user)
@@ -440,11 +443,7 @@ class BaseHandler(RequestHandler):
otherwise it is called immediately.
"""
# wait for spawn Future
try:
yield spawn_future
except Exception:
spawner._spawn_pending = False
raise
yield spawn_future
toc = IOLoop.current().time()
self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic)
self.statsd.timing('spawner.success', (toc - tic) * 1000)
@@ -459,10 +458,22 @@ class BaseHandler(RequestHandler):
spawner.add_poll_callback(self.user_stopped, user, server_name)
finally:
spawner._proxy_pending = False
spawner._spawn_pending = False
# hook up spawner._spawn_future so that other requests can await
# this result
finish_spawn_future = spawner._spawn_future = finish_user_spawn()
def _clear_spawn_future(f):
# clear spawner._spawn_future when it's done
# keep an exception around, though, to prevent repeated implicit spawns
# if spawn is failing
if f.exception() is None:
spawner._spawn_future = None
# Now we're all done. clear _spawn_pending flag
spawner._spawn_pending = False
finish_spawn_future.add_done_callback(_clear_spawn_future)
try:
yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), finish_user_spawn())
yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), finish_spawn_future)
except gen.TimeoutError:
# waiting_for_response indicates server process has started,
# but is yet to become responsive.
@@ -479,7 +490,8 @@ class BaseHandler(RequestHandler):
if status is not None:
toc = IOLoop.current().time()
self.statsd.timing('spawner.failure', (toc - tic) * 1000)
raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status)
raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % (
status, spawner._log_name))
if spawner._waiting_for_response:
# hit timeout waiting for response, but server's running.
@@ -549,6 +561,19 @@ class BaseHandler(RequestHandler):
# template rendering
#---------------------------------------------------------------
@property
def spawn_home_error(self):
"""Extra message pointing users to try spawning again from /hub/home.
Should be added to `self.extra_error_html` for any handler
that could serve a failed spawn message.
"""
home = url_path_join(self.hub.base_url, 'home')
return (
"You can try restarting your server from the "
"<a href='{home}'>home page</a>.".format(home=home)
)
def get_template(self, name):
"""Return the jinja template object for a given name"""
return self.settings['jinja2_env'].get_template(name)
@@ -596,6 +621,7 @@ class BaseHandler(RequestHandler):
status_code=status_code,
status_message=status_message,
message=message,
extra_error_html=getattr(self, 'extra_error_html', ''),
exception=exception,
)
@@ -649,10 +675,13 @@ class UserSpawnHandler(BaseHandler):
current_user = self.get_current_user()
if current_user and current_user.name == name:
# if spawning fails for any reason, point users to /hub/home to retry
self.extra_error_html = self.spawn_home_error
# If people visit /user/:name directly on the Hub,
# the redirects will just loop, because the proxy is bypassed.
# Try to check for that and warn,
# though the user-facing behavior is unchainged
# though the user-facing behavior is unchanged
host_info = urlparse(self.request.full_url())
port = host_info.port
if not port:
@@ -664,8 +693,34 @@ class UserSpawnHandler(BaseHandler):
Make sure to connect to the proxied public URL %s
""", self.request.full_url(), self.proxy.public_url)
# logged in as correct user, spawn the server
# logged in as correct user, check for pending spawn
spawner = current_user.spawner
# First, check for previous failure.
if (
not spawner.active
and spawner._spawn_future
and spawner._spawn_future.done()
and spawner._spawn_future.exception()
):
# Condition: spawner not active and _spawn_future exists and contains an Exception
# Implicit spawn on /user/:name is not allowed if the user's last spawn failed.
# We should point the user to Home if the most recent spawn failed.
self.log.error("Preventing implicit spawn for %s because last spawn failed: %s",
spawner._log_name, spawner._spawn_future.exception())
raise spawner._spawn_future.exception()
# check for pending spawn
if spawner.pending and spawner._spawn_future:
# wait on the pending spawn
self.log.debug("Waiting for %s pending %s", spawner._log_name, spawner.pending)
try:
yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), spawner._spawn_future)
except gen.TimeoutError:
self.log.info("Pending spawn for %s didn't finish in %.1f seconds", spawner._log_name, self.slow_spawn_timeout)
pass
# we may have waited above, check pending again:
if spawner.pending:
self.log.info("%s is pending %s", spawner._log_name, spawner.pending)
# spawn has started, but not finished
@@ -679,6 +734,8 @@ class UserSpawnHandler(BaseHandler):
status = yield spawner.poll()
else:
status = 0
# server is not running, trigger spawn
if status is not None:
if spawner.options_form:
self.redirect(url_concat(url_path_join(self.hub.base_url, 'spawn'),
@@ -687,6 +744,15 @@ class UserSpawnHandler(BaseHandler):
else:
yield self.spawn_single_user(current_user)
# spawn didn't finish, show pending page
if spawner.pending:
self.log.info("%s is pending %s", spawner._log_name, spawner.pending)
# spawn has started, but not finished
self.statsd.incr('redirects.user_spawn_pending', 1)
html = self.render_template("spawn_pending.html", user=current_user)
self.finish(html)
return
# We do exponential backoff here - since otherwise we can get stuck in a redirect loop!
# This is important in many distributed proxy implementations - those are often eventually
# consistent and can take upto a couple of seconds to actually apply throughout the cluster.
@@ -786,6 +852,13 @@ class CSPReportHandler(BaseHandler):
self.statsd.incr('csp_report')
class AddSlashHandler(BaseHandler):
"""Handler for adding trailing slash to URLs that need them"""
def get(self, *args):
src = urlparse(self.request.uri)
dest = src._replace(path=src.path + '/')
self.redirect(urlunparse(dest))
default_handlers = [
(r'/user/([^/]+)(/.*)?', UserSpawnHandler),
(r'/user-redirect/(.*)?', UserRedirectHandler),

View File

@@ -67,9 +67,13 @@ class HomeHandler(BaseHandler):
if user.running:
# trigger poll_and_notify event in case of a server that died
yield user.spawner.poll_and_notify()
# send the user to /spawn if they aren't running,
# to establish that this is an explicit spawn request rather
# than an implicit one, which can be caused by any link to `/user/:name`
url = user.url if user.running else url_path_join(self.hub.base_url, 'spawn')
html = self.render_template('home.html',
user=user,
url=user.url,
url=url,
)
self.finish(html)
@@ -92,7 +96,10 @@ class SpawnHandler(BaseHandler):
@web.authenticated
def get(self):
"""GET renders form for spawning with user-specified options"""
"""GET renders form for spawning with user-specified options
or triggers spawn via redirect if there is no form.
"""
user = self.get_current_user()
if not self.allow_named_servers and user.running:
url = user.url
@@ -102,7 +109,12 @@ class SpawnHandler(BaseHandler):
if user.spawner.options_form:
self.finish(self._render_form())
else:
# not running, no form. Trigger spawn.
# Explicit spawn request: clear _spawn_future
# which may have been saved to prevent implicit spawns
# after a failure.
if user.spawner._spawn_future and user.spawner._spawn_future.done():
user.spawner._spawn_future = None
# not running, no form. Trigger spawn by redirecting to /user/:name
self.redirect(user.url)
@web.authenticated

View File

@@ -54,6 +54,7 @@ class Spawner(LoggingConfigurable):
_proxy_pending = False
_waiting_for_response = False
_jupyterhub_version = None
_spawn_future = None
@property
def _log_name(self):

View File

@@ -126,7 +126,7 @@ def test_spawn_redirect(app):
# should have started server
status = yield u.spawner.poll()
assert status is None
# test spawn page when server is already running (just redirect)
r = yield get_page('spawn', app, cookies=cookies)
r.raise_for_status()
@@ -134,6 +134,12 @@ def test_spawn_redirect(app):
path = urlparse(r.url).path
assert path == ujoin(app.base_url, '/user/%s/' % name)
# test handing of trailing slash on `/user/name`
r = yield get_page('user/' + name, app, cookies=cookies)
r.raise_for_status()
path = urlparse(r.url).path
assert path == ujoin(app.base_url, '/user/%s/' % name)
@pytest.mark.gen_test
def test_spawn_page(app):

View File

@@ -4,5 +4,5 @@ tornado>=4.1
jinja2
pamela
python-oauth2>=1.0
SQLAlchemy>=1.0
SQLAlchemy>=1.1
requests

View File

@@ -22,6 +22,11 @@
{{message_html | safe}}
</p>
{% endif %}
{% if extra_error_html %}
<p>
{{extra_error_html | safe}}
</p>
{% endif %}
{% endblock error_detail %}
</div>

View File

@@ -8,7 +8,7 @@
{% block login %}
<div id="login-main" class="container">
{% if custom_html %}
{{ custom_html }}
{{ custom_html | safe }}
{% elif login_service %}
<div class="service-login">
<a role="button" class='btn btn-jupyter btn-lg' href='{{authenticator_login_url}}'>