mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-16 22:43:00 +00:00
allow high latency spawners
such as VMs, batch and cloud services, etc. which can take minutes to start. - Spawner.start_timeout sets a limit for true failure, at which point spawner should be considered dead. - Handler.spawn_single_user only waits up to 10 seconds before returning. It can now return with a spawner still pending. - Record User.spawn_pending state, and render 'pending' page while server is starting but not started.
This commit is contained in:
@@ -4,13 +4,14 @@
|
|||||||
# Distributed under the terms of the Modified BSD License.
|
# Distributed under the terms of the Modified BSD License.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from http.client import responses
|
from http.client import responses
|
||||||
|
|
||||||
from jinja2 import TemplateNotFound
|
from jinja2 import TemplateNotFound
|
||||||
|
|
||||||
from tornado.log import app_log
|
from tornado.log import app_log
|
||||||
from tornado.httputil import url_concat
|
from tornado.httputil import url_concat
|
||||||
|
from tornado.ioloop import IOLoop
|
||||||
from tornado.web import RequestHandler
|
from tornado.web import RequestHandler
|
||||||
from tornado import gen, web
|
from tornado import gen, web
|
||||||
|
|
||||||
@@ -160,25 +161,54 @@ class BaseHandler(RequestHandler):
|
|||||||
# spawning-related
|
# spawning-related
|
||||||
#---------------------------------------------------------------
|
#---------------------------------------------------------------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def slow_spawn_timeout(self):
|
||||||
|
return self.settings.get('slow_spawn_timeout', 10)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def spawner_class(self):
|
def spawner_class(self):
|
||||||
return self.settings.get('spawner_class', LocalProcessSpawner)
|
return self.settings.get('spawner_class', LocalProcessSpawner)
|
||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
def spawn_single_user(self, user):
|
def spawn_single_user(self, user):
|
||||||
yield user.spawn(
|
f = user.spawn(
|
||||||
spawner_class=self.spawner_class,
|
spawner_class=self.spawner_class,
|
||||||
base_url=self.base_url,
|
base_url=self.base_url,
|
||||||
hub=self.hub,
|
hub=self.hub,
|
||||||
config=self.config,
|
config=self.config,
|
||||||
)
|
)
|
||||||
yield self.proxy.add_user(user)
|
@gen.coroutine
|
||||||
user.spawner.add_poll_callback(self.user_stopped, user)
|
def finish_user_spawn(f=None):
|
||||||
return user
|
"""Finish the user spawn by registering listeners and notifying the proxy.
|
||||||
|
|
||||||
|
If the spawner is slow to start, this is passed as an async callback,
|
||||||
|
otherwise it is called immediately.
|
||||||
|
"""
|
||||||
|
if f and f.exception() is not None:
|
||||||
|
# failed, don't add to the proxy
|
||||||
|
return
|
||||||
|
yield self.proxy.add_user(user)
|
||||||
|
user.spawner.add_poll_callback(self.user_stopped, user)
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), f)
|
||||||
|
except gen.TimeoutError:
|
||||||
|
if user.spawn_pending:
|
||||||
|
# hit timeout, but spawn is still pending
|
||||||
|
self.log.warn("User %s server is slow to start", user.name)
|
||||||
|
# schedule finish for when the user finishes spawning
|
||||||
|
IOLoop.current().add_future(f, finish_user_spawn)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
yield finish_user_spawn()
|
||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
def user_stopped(self, user):
|
def user_stopped(self, user):
|
||||||
|
"""Callback that fires when the spawner has stopped"""
|
||||||
status = yield user.spawner.poll()
|
status = yield user.spawner.poll()
|
||||||
|
if status is None:
|
||||||
|
status = 'unknown'
|
||||||
self.log.warn("User %s server stopped, with exit code: %s",
|
self.log.warn("User %s server stopped, with exit code: %s",
|
||||||
user.name, status,
|
user.name, status,
|
||||||
)
|
)
|
||||||
@@ -279,6 +309,13 @@ class UserSpawnHandler(BaseHandler):
|
|||||||
if current_user and current_user.name == name:
|
if current_user and current_user.name == name:
|
||||||
# logged in, spawn the server
|
# logged in, spawn the server
|
||||||
if current_user.spawner:
|
if current_user.spawner:
|
||||||
|
if current_user.spawn_pending:
|
||||||
|
# spawn has started, but not finished
|
||||||
|
html = self.render_template("spawn_pending.html", user=current_user)
|
||||||
|
self.finish(html)
|
||||||
|
return
|
||||||
|
|
||||||
|
# spawn has supposedly finished, check on the status
|
||||||
status = yield current_user.spawner.poll()
|
status = yield current_user.spawner.poll()
|
||||||
if status is not None:
|
if status is not None:
|
||||||
yield self.spawn_single_user(current_user)
|
yield self.spawn_single_user(current_user)
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
# Copyright (c) Jupyter Development Team.
|
# Copyright (c) Jupyter Development Team.
|
||||||
# Distributed under the terms of the Modified BSD License.
|
# Distributed under the terms of the Modified BSD License.
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
import errno
|
import errno
|
||||||
import json
|
import json
|
||||||
import socket
|
import socket
|
||||||
@@ -250,6 +250,7 @@ class User(Base):
|
|||||||
cookie_id = Column(Unicode, default=new_token)
|
cookie_id = Column(Unicode, default=new_token)
|
||||||
state = Column(JSONDict)
|
state = Column(JSONDict)
|
||||||
spawner = None
|
spawner = None
|
||||||
|
spawn_pending = False
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if self.server:
|
if self.server:
|
||||||
@@ -310,7 +311,23 @@ class User(Base):
|
|||||||
spawner.clear_state()
|
spawner.clear_state()
|
||||||
spawner.api_token = api_token
|
spawner.api_token = api_token
|
||||||
|
|
||||||
yield spawner.start()
|
self.spawn_pending = True
|
||||||
|
f = spawner.start()
|
||||||
|
# wait for spawner.start to return
|
||||||
|
try:
|
||||||
|
yield gen.with_timeout(timedelta(seconds=spawner.start_timeout), f)
|
||||||
|
except gen.TimeoutError as e:
|
||||||
|
self.log.warn("{user}'s server failed to start in {s} seconds, giving up".format(
|
||||||
|
user=self.name, s=spawner.start_timeout,
|
||||||
|
))
|
||||||
|
try:
|
||||||
|
yield self.stop()
|
||||||
|
except Exception:
|
||||||
|
self.log.error("Failed to cleanup {user}'s server that failed to start".format(
|
||||||
|
user=self.name,
|
||||||
|
), exc_info=True)
|
||||||
|
# raise original TimeoutError
|
||||||
|
raise e
|
||||||
spawner.start_polling()
|
spawner.start_polling()
|
||||||
|
|
||||||
# store state
|
# store state
|
||||||
@@ -320,7 +337,7 @@ class User(Base):
|
|||||||
try:
|
try:
|
||||||
yield self.server.wait_up(http=True)
|
yield self.server.wait_up(http=True)
|
||||||
except TimeoutError as e:
|
except TimeoutError as e:
|
||||||
self.log.warn("{user}'s server never started at {url}, giving up.".format(
|
self.log.warn("{user}'s server never showed up at {url}, giving up".format(
|
||||||
user=self.name, url=self.server.url,
|
user=self.name, url=self.server.url,
|
||||||
))
|
))
|
||||||
try:
|
try:
|
||||||
@@ -331,6 +348,7 @@ class User(Base):
|
|||||||
), exc_info=True)
|
), exc_info=True)
|
||||||
# raise original TimeoutError
|
# raise original TimeoutError
|
||||||
raise e
|
raise e
|
||||||
|
self.spawn_pending = False
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
@@ -339,6 +357,7 @@ class User(Base):
|
|||||||
|
|
||||||
and cleanup after it.
|
and cleanup after it.
|
||||||
"""
|
"""
|
||||||
|
self.spawn_pending = False
|
||||||
if self.spawner is None:
|
if self.spawner is None:
|
||||||
return
|
return
|
||||||
self.spawner.stop_polling()
|
self.spawner.stop_polling()
|
||||||
|
@@ -40,6 +40,14 @@ class Spawner(LoggingConfigurable):
|
|||||||
user = Any()
|
user = Any()
|
||||||
hub = Any()
|
hub = Any()
|
||||||
api_token = Unicode()
|
api_token = Unicode()
|
||||||
|
start_timeout = Integer(60, config=True,
|
||||||
|
help="""Timeout (in seconds) before giving up on the spawner.
|
||||||
|
|
||||||
|
This is the timeout for start to return, not the timeout for the server to respond.
|
||||||
|
Callers of spawner.start will assume that startup has failed if it takes longer than this.
|
||||||
|
start should return when the server process is started and its location is known.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
poll_interval = Integer(30, config=True,
|
poll_interval = Integer(30, config=True,
|
||||||
help="""Interval (in seconds) on which to poll the spawner."""
|
help="""Interval (in seconds) on which to poll the spawner."""
|
||||||
|
28
share/jupyter/templates/spawn_pending.html
Normal file
28
share/jupyter/templates/spawn_pending.html
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
{% extends "page.html" %}
|
||||||
|
|
||||||
|
{% block main %}
|
||||||
|
|
||||||
|
<div class="container">
|
||||||
|
<div class="row">
|
||||||
|
<div class="text-center">
|
||||||
|
<p>Your server is starting up.</p>
|
||||||
|
<p>You will be redirected automatically when it's ready for you.</p>
|
||||||
|
<a id="refresh" class="btn btn-lg btn-primary" href="#">refresh</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block script %}
|
||||||
|
<script type="text/javascript">
|
||||||
|
require(["jquery"], function ($) {
|
||||||
|
$("#refresh").click(function () {
|
||||||
|
window.location.reload();
|
||||||
|
})
|
||||||
|
setTimeout(function () {
|
||||||
|
window.location.reload();
|
||||||
|
}, 5000);
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
Reference in New Issue
Block a user