mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-16 22:43:00 +00:00
allow high latency spawners
such as VMs, batch and cloud services, etc. which can take minutes to start. - Spawner.start_timeout sets a limit for true failure, at which point spawner should be considered dead. - Handler.spawn_single_user only waits up to 10 seconds before returning. It can now return with a spawner still pending. - Record User.spawn_pending state, and render 'pending' page while server is starting but not started.
This commit is contained in:
@@ -4,13 +4,14 @@
|
||||
# Distributed under the terms of the Modified BSD License.
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from http.client import responses
|
||||
|
||||
from jinja2 import TemplateNotFound
|
||||
|
||||
from tornado.log import app_log
|
||||
from tornado.httputil import url_concat
|
||||
from tornado.ioloop import IOLoop
|
||||
from tornado.web import RequestHandler
|
||||
from tornado import gen, web
|
||||
|
||||
@@ -160,25 +161,54 @@ class BaseHandler(RequestHandler):
|
||||
# spawning-related
|
||||
#---------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def slow_spawn_timeout(self):
|
||||
return self.settings.get('slow_spawn_timeout', 10)
|
||||
|
||||
@property
|
||||
def spawner_class(self):
|
||||
return self.settings.get('spawner_class', LocalProcessSpawner)
|
||||
|
||||
@gen.coroutine
|
||||
def spawn_single_user(self, user):
|
||||
yield user.spawn(
|
||||
f = user.spawn(
|
||||
spawner_class=self.spawner_class,
|
||||
base_url=self.base_url,
|
||||
hub=self.hub,
|
||||
config=self.config,
|
||||
)
|
||||
@gen.coroutine
|
||||
def finish_user_spawn(f=None):
|
||||
"""Finish the user spawn by registering listeners and notifying the proxy.
|
||||
|
||||
If the spawner is slow to start, this is passed as an async callback,
|
||||
otherwise it is called immediately.
|
||||
"""
|
||||
if f and f.exception() is not None:
|
||||
# failed, don't add to the proxy
|
||||
return
|
||||
yield self.proxy.add_user(user)
|
||||
user.spawner.add_poll_callback(self.user_stopped, user)
|
||||
return user
|
||||
|
||||
try:
|
||||
yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), f)
|
||||
except gen.TimeoutError:
|
||||
if user.spawn_pending:
|
||||
# hit timeout, but spawn is still pending
|
||||
self.log.warn("User %s server is slow to start", user.name)
|
||||
# schedule finish for when the user finishes spawning
|
||||
IOLoop.current().add_future(f, finish_user_spawn)
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
yield finish_user_spawn()
|
||||
|
||||
@gen.coroutine
|
||||
def user_stopped(self, user):
|
||||
"""Callback that fires when the spawner has stopped"""
|
||||
status = yield user.spawner.poll()
|
||||
if status is None:
|
||||
status = 'unknown'
|
||||
self.log.warn("User %s server stopped, with exit code: %s",
|
||||
user.name, status,
|
||||
)
|
||||
@@ -279,6 +309,13 @@ class UserSpawnHandler(BaseHandler):
|
||||
if current_user and current_user.name == name:
|
||||
# logged in, spawn the server
|
||||
if current_user.spawner:
|
||||
if current_user.spawn_pending:
|
||||
# spawn has started, but not finished
|
||||
html = self.render_template("spawn_pending.html", user=current_user)
|
||||
self.finish(html)
|
||||
return
|
||||
|
||||
# spawn has supposedly finished, check on the status
|
||||
status = yield current_user.spawner.poll()
|
||||
if status is not None:
|
||||
yield self.spawn_single_user(current_user)
|
||||
|
@@ -3,7 +3,7 @@
|
||||
# Copyright (c) Jupyter Development Team.
|
||||
# Distributed under the terms of the Modified BSD License.
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
import errno
|
||||
import json
|
||||
import socket
|
||||
@@ -250,6 +250,7 @@ class User(Base):
|
||||
cookie_id = Column(Unicode, default=new_token)
|
||||
state = Column(JSONDict)
|
||||
spawner = None
|
||||
spawn_pending = False
|
||||
|
||||
def __repr__(self):
|
||||
if self.server:
|
||||
@@ -310,7 +311,23 @@ class User(Base):
|
||||
spawner.clear_state()
|
||||
spawner.api_token = api_token
|
||||
|
||||
yield spawner.start()
|
||||
self.spawn_pending = True
|
||||
f = spawner.start()
|
||||
# wait for spawner.start to return
|
||||
try:
|
||||
yield gen.with_timeout(timedelta(seconds=spawner.start_timeout), f)
|
||||
except gen.TimeoutError as e:
|
||||
self.log.warn("{user}'s server failed to start in {s} seconds, giving up".format(
|
||||
user=self.name, s=spawner.start_timeout,
|
||||
))
|
||||
try:
|
||||
yield self.stop()
|
||||
except Exception:
|
||||
self.log.error("Failed to cleanup {user}'s server that failed to start".format(
|
||||
user=self.name,
|
||||
), exc_info=True)
|
||||
# raise original TimeoutError
|
||||
raise e
|
||||
spawner.start_polling()
|
||||
|
||||
# store state
|
||||
@@ -320,7 +337,7 @@ class User(Base):
|
||||
try:
|
||||
yield self.server.wait_up(http=True)
|
||||
except TimeoutError as e:
|
||||
self.log.warn("{user}'s server never started at {url}, giving up.".format(
|
||||
self.log.warn("{user}'s server never showed up at {url}, giving up".format(
|
||||
user=self.name, url=self.server.url,
|
||||
))
|
||||
try:
|
||||
@@ -331,6 +348,7 @@ class User(Base):
|
||||
), exc_info=True)
|
||||
# raise original TimeoutError
|
||||
raise e
|
||||
self.spawn_pending = False
|
||||
return self
|
||||
|
||||
@gen.coroutine
|
||||
@@ -339,6 +357,7 @@ class User(Base):
|
||||
|
||||
and cleanup after it.
|
||||
"""
|
||||
self.spawn_pending = False
|
||||
if self.spawner is None:
|
||||
return
|
||||
self.spawner.stop_polling()
|
||||
|
@@ -40,6 +40,14 @@ class Spawner(LoggingConfigurable):
|
||||
user = Any()
|
||||
hub = Any()
|
||||
api_token = Unicode()
|
||||
start_timeout = Integer(60, config=True,
|
||||
help="""Timeout (in seconds) before giving up on the spawner.
|
||||
|
||||
This is the timeout for start to return, not the timeout for the server to respond.
|
||||
Callers of spawner.start will assume that startup has failed if it takes longer than this.
|
||||
start should return when the server process is started and its location is known.
|
||||
"""
|
||||
)
|
||||
|
||||
poll_interval = Integer(30, config=True,
|
||||
help="""Interval (in seconds) on which to poll the spawner."""
|
||||
|
28
share/jupyter/templates/spawn_pending.html
Normal file
28
share/jupyter/templates/spawn_pending.html
Normal file
@@ -0,0 +1,28 @@
|
||||
{% extends "page.html" %}
|
||||
|
||||
{% block main %}
|
||||
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="text-center">
|
||||
<p>Your server is starting up.</p>
|
||||
<p>You will be redirected automatically when it's ready for you.</p>
|
||||
<a id="refresh" class="btn btn-lg btn-primary" href="#">refresh</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
|
||||
{% block script %}
|
||||
<script type="text/javascript">
|
||||
require(["jquery"], function ($) {
|
||||
$("#refresh").click(function () {
|
||||
window.location.reload();
|
||||
})
|
||||
setTimeout(function () {
|
||||
window.location.reload();
|
||||
}, 5000);
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
Reference in New Issue
Block a user