Merge pull request #115 from minrk/latency-spawner

allow high latency spawners
This commit is contained in:
Min RK
2014-12-22 15:02:49 -08:00
6 changed files with 172 additions and 8 deletions

View File

@@ -4,13 +4,14 @@
# Distributed under the terms of the Modified BSD License. # Distributed under the terms of the Modified BSD License.
import re import re
from datetime import datetime from datetime import datetime, timedelta
from http.client import responses from http.client import responses
from jinja2 import TemplateNotFound from jinja2 import TemplateNotFound
from tornado.log import app_log from tornado.log import app_log
from tornado.httputil import url_concat from tornado.httputil import url_concat
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler from tornado.web import RequestHandler
from tornado import gen, web from tornado import gen, web
@@ -160,25 +161,54 @@ class BaseHandler(RequestHandler):
# spawning-related # spawning-related
#--------------------------------------------------------------- #---------------------------------------------------------------
@property
def slow_spawn_timeout(self):
return self.settings.get('slow_spawn_timeout', 10)
@property @property
def spawner_class(self): def spawner_class(self):
return self.settings.get('spawner_class', LocalProcessSpawner) return self.settings.get('spawner_class', LocalProcessSpawner)
@gen.coroutine @gen.coroutine
def spawn_single_user(self, user): def spawn_single_user(self, user):
yield user.spawn( f = user.spawn(
spawner_class=self.spawner_class, spawner_class=self.spawner_class,
base_url=self.base_url, base_url=self.base_url,
hub=self.hub, hub=self.hub,
config=self.config, config=self.config,
) )
@gen.coroutine
def finish_user_spawn(f=None):
"""Finish the user spawn by registering listeners and notifying the proxy.
If the spawner is slow to start, this is passed as an async callback,
otherwise it is called immediately.
"""
if f and f.exception() is not None:
# failed, don't add to the proxy
return
yield self.proxy.add_user(user) yield self.proxy.add_user(user)
user.spawner.add_poll_callback(self.user_stopped, user) user.spawner.add_poll_callback(self.user_stopped, user)
return user
try:
yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), f)
except gen.TimeoutError:
if user.spawn_pending:
# hit timeout, but spawn is still pending
self.log.warn("User %s server is slow to start", user.name)
# schedule finish for when the user finishes spawning
IOLoop.current().add_future(f, finish_user_spawn)
else:
raise
else:
yield finish_user_spawn()
@gen.coroutine @gen.coroutine
def user_stopped(self, user): def user_stopped(self, user):
"""Callback that fires when the spawner has stopped"""
status = yield user.spawner.poll() status = yield user.spawner.poll()
if status is None:
status = 'unknown'
self.log.warn("User %s server stopped, with exit code: %s", self.log.warn("User %s server stopped, with exit code: %s",
user.name, status, user.name, status,
) )
@@ -279,6 +309,13 @@ class UserSpawnHandler(BaseHandler):
if current_user and current_user.name == name: if current_user and current_user.name == name:
# logged in, spawn the server # logged in, spawn the server
if current_user.spawner: if current_user.spawner:
if current_user.spawn_pending:
# spawn has started, but not finished
html = self.render_template("spawn_pending.html", user=current_user)
self.finish(html)
return
# spawn has supposedly finished, check on the status
status = yield current_user.spawner.poll() status = yield current_user.spawner.poll()
if status is not None: if status is not None:
yield self.spawn_single_user(current_user) yield self.spawn_single_user(current_user)

View File

@@ -3,7 +3,7 @@
# Copyright (c) Jupyter Development Team. # Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License. # Distributed under the terms of the Modified BSD License.
from datetime import datetime from datetime import datetime, timedelta
import errno import errno
import json import json
import socket import socket
@@ -250,6 +250,7 @@ class User(Base):
cookie_id = Column(Unicode, default=new_token) cookie_id = Column(Unicode, default=new_token)
state = Column(JSONDict) state = Column(JSONDict)
spawner = None spawner = None
spawn_pending = False
def __repr__(self): def __repr__(self):
if self.server: if self.server:
@@ -310,7 +311,23 @@ class User(Base):
spawner.clear_state() spawner.clear_state()
spawner.api_token = api_token spawner.api_token = api_token
yield spawner.start() self.spawn_pending = True
f = spawner.start()
# wait for spawner.start to return
try:
yield gen.with_timeout(timedelta(seconds=spawner.start_timeout), f)
except gen.TimeoutError as e:
self.log.warn("{user}'s server failed to start in {s} seconds, giving up".format(
user=self.name, s=spawner.start_timeout,
))
try:
yield self.stop()
except Exception:
self.log.error("Failed to cleanup {user}'s server that failed to start".format(
user=self.name,
), exc_info=True)
# raise original TimeoutError
raise e
spawner.start_polling() spawner.start_polling()
# store state # store state
@@ -320,7 +337,7 @@ class User(Base):
try: try:
yield self.server.wait_up(http=True) yield self.server.wait_up(http=True)
except TimeoutError as e: except TimeoutError as e:
self.log.warn("{user}'s server never started at {url}, giving up.".format( self.log.warn("{user}'s server never showed up at {url}, giving up".format(
user=self.name, url=self.server.url, user=self.name, url=self.server.url,
)) ))
try: try:
@@ -331,6 +348,7 @@ class User(Base):
), exc_info=True) ), exc_info=True)
# raise original TimeoutError # raise original TimeoutError
raise e raise e
self.spawn_pending = False
return self return self
@gen.coroutine @gen.coroutine
@@ -339,6 +357,7 @@ class User(Base):
and cleanup after it. and cleanup after it.
""" """
self.spawn_pending = False
if self.spawner is None: if self.spawner is None:
return return
self.spawner.stop_polling() self.spawner.stop_polling()

View File

@@ -40,6 +40,14 @@ class Spawner(LoggingConfigurable):
user = Any() user = Any()
hub = Any() hub = Any()
api_token = Unicode() api_token = Unicode()
start_timeout = Integer(60, config=True,
help="""Timeout (in seconds) before giving up on the spawner.
This is the timeout for start to return, not the timeout for the server to respond.
Callers of spawner.start will assume that startup has failed if it takes longer than this.
start should return when the server process is started and its location is known.
"""
)
poll_interval = Integer(30, config=True, poll_interval = Integer(30, config=True,
help="""Interval (in seconds) on which to poll the spawner.""" help="""Interval (in seconds) on which to poll the spawner."""

View File

@@ -1,11 +1,14 @@
"""mock utilities for testing""" """mock utilities for testing"""
import sys import sys
from datetime import timedelta
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
import threading import threading
from unittest import mock from unittest import mock
from tornado import gen
from tornado.concurrent import Future
from tornado.ioloop import IOLoop from tornado.ioloop import IOLoop
from ..spawner import LocalProcessSpawner from ..spawner import LocalProcessSpawner
@@ -41,6 +44,26 @@ class MockSpawner(LocalProcessSpawner):
return [sys.executable, '-m', 'jupyterhub.tests.mocksu'] return [sys.executable, '-m', 'jupyterhub.tests.mocksu']
class SlowSpawner(MockSpawner):
"""A spawner that takes a few seconds to start"""
@gen.coroutine
def start(self):
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=5))
yield super().start()
class NeverSpawner(MockSpawner):
"""A spawner that will never start"""
def _start_timeout_default(self):
return 1
def start(self):
"""Return a Future that will never finish"""
return Future()
class MockPAMAuthenticator(PAMAuthenticator): class MockPAMAuthenticator(PAMAuthenticator):
def system_user_exists(self, user): def system_user_exists(self, user):
# skip the add-system-user bit # skip the add-system-user bit

View File

@@ -1,11 +1,15 @@
"""Tests for the REST API""" """Tests for the REST API"""
import json import json
from datetime import timedelta
import requests import requests
from tornado import gen
from ..utils import url_path_join as ujoin from ..utils import url_path_join as ujoin
from .. import orm from .. import orm
from . import mocking
def check_db_locks(func): def check_db_locks(func):
@@ -174,6 +178,7 @@ def test_spawn(app, io_loop):
assert r.status_code == 201 assert r.status_code == 201
assert 'pid' in user.state assert 'pid' in user.state
assert user.spawner is not None assert user.spawner is not None
assert not user.spawn_pending
status = io_loop.run_sync(user.spawner.poll) status = io_loop.run_sync(user.spawner.poll)
assert status is None assert status is None
@@ -194,3 +199,47 @@ def test_spawn(app, io_loop):
assert 'pid' not in user.state assert 'pid' not in user.state
status = io_loop.run_sync(user.spawner.poll) status = io_loop.run_sync(user.spawner.poll)
assert status == 0 assert status == 0
def test_slow_spawn(app, io_loop):
app.tornado_application.settings['spawner_class'] = mocking.SlowSpawner
app.tornado_application.settings['slow_spawn_timeout'] = 0
db = app.db
name = 'zoe'
user = add_user(db, name=name)
r = api_request(app, 'users', name, 'server', method='post')
assert user.spawner is not None
assert user.spawn_pending
dt = timedelta(seconds=0.1)
@gen.coroutine
def wait_pending():
while user.spawn_pending:
yield gen.Task(io_loop.add_timeout, dt)
io_loop.run_sync(wait_pending)
assert not user.spawn_pending
status = io_loop.run_sync(user.spawner.poll)
assert status is None
def test_never_spawn(app, io_loop):
app.tornado_application.settings['spawner_class'] = mocking.NeverSpawner
app.tornado_application.settings['slow_spawn_timeout'] = 0
db = app.db
name = 'badger'
user = add_user(db, name=name)
r = api_request(app, 'users', name, 'server', method='post')
assert user.spawner is not None
assert user.spawn_pending
dt = timedelta(seconds=0.1)
@gen.coroutine
def wait_pending():
while user.spawn_pending:
yield gen.Task(io_loop.add_timeout, dt)
io_loop.run_sync(wait_pending)
assert not user.spawn_pending
status = io_loop.run_sync(user.spawner.poll)
assert status is not None

View File

@@ -0,0 +1,28 @@
{% extends "page.html" %}
{% block main %}
<div class="container">
<div class="row">
<div class="text-center">
<p>Your server is starting up.</p>
<p>You will be redirected automatically when it's ready for you.</p>
<a id="refresh" class="btn btn-lg btn-primary" href="#">refresh</a>
</div>
</div>
</div>
{% endblock %}
{% block script %}
<script type="text/javascript">
require(["jquery"], function ($) {
$("#refresh").click(function () {
window.location.reload();
})
setTimeout(function () {
window.location.reload();
}, 5000);
});
</script>
{% endblock %}