Merge pull request #115 from minrk/latency-spawner

allow high latency spawners
This commit is contained in:
Min RK
2014-12-22 15:02:49 -08:00
6 changed files with 172 additions and 8 deletions

View File

@@ -4,13 +4,14 @@
# Distributed under the terms of the Modified BSD License.
import re
from datetime import datetime
from datetime import datetime, timedelta
from http.client import responses
from jinja2 import TemplateNotFound
from tornado.log import app_log
from tornado.httputil import url_concat
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler
from tornado import gen, web
@@ -160,25 +161,54 @@ class BaseHandler(RequestHandler):
# spawning-related
#---------------------------------------------------------------
@property
def slow_spawn_timeout(self):
return self.settings.get('slow_spawn_timeout', 10)
@property
def spawner_class(self):
return self.settings.get('spawner_class', LocalProcessSpawner)
@gen.coroutine
def spawn_single_user(self, user):
yield user.spawn(
f = user.spawn(
spawner_class=self.spawner_class,
base_url=self.base_url,
hub=self.hub,
config=self.config,
)
yield self.proxy.add_user(user)
user.spawner.add_poll_callback(self.user_stopped, user)
return user
@gen.coroutine
def finish_user_spawn(f=None):
"""Finish the user spawn by registering listeners and notifying the proxy.
If the spawner is slow to start, this is passed as an async callback,
otherwise it is called immediately.
"""
if f and f.exception() is not None:
# failed, don't add to the proxy
return
yield self.proxy.add_user(user)
user.spawner.add_poll_callback(self.user_stopped, user)
try:
yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), f)
except gen.TimeoutError:
if user.spawn_pending:
# hit timeout, but spawn is still pending
self.log.warn("User %s server is slow to start", user.name)
# schedule finish for when the user finishes spawning
IOLoop.current().add_future(f, finish_user_spawn)
else:
raise
else:
yield finish_user_spawn()
@gen.coroutine
def user_stopped(self, user):
"""Callback that fires when the spawner has stopped"""
status = yield user.spawner.poll()
if status is None:
status = 'unknown'
self.log.warn("User %s server stopped, with exit code: %s",
user.name, status,
)
@@ -279,6 +309,13 @@ class UserSpawnHandler(BaseHandler):
if current_user and current_user.name == name:
# logged in, spawn the server
if current_user.spawner:
if current_user.spawn_pending:
# spawn has started, but not finished
html = self.render_template("spawn_pending.html", user=current_user)
self.finish(html)
return
# spawn has supposedly finished, check on the status
status = yield current_user.spawner.poll()
if status is not None:
yield self.spawn_single_user(current_user)

View File

@@ -3,7 +3,7 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
from datetime import datetime
from datetime import datetime, timedelta
import errno
import json
import socket
@@ -250,6 +250,7 @@ class User(Base):
cookie_id = Column(Unicode, default=new_token)
state = Column(JSONDict)
spawner = None
spawn_pending = False
def __repr__(self):
if self.server:
@@ -310,7 +311,23 @@ class User(Base):
spawner.clear_state()
spawner.api_token = api_token
yield spawner.start()
self.spawn_pending = True
f = spawner.start()
# wait for spawner.start to return
try:
yield gen.with_timeout(timedelta(seconds=spawner.start_timeout), f)
except gen.TimeoutError as e:
self.log.warn("{user}'s server failed to start in {s} seconds, giving up".format(
user=self.name, s=spawner.start_timeout,
))
try:
yield self.stop()
except Exception:
self.log.error("Failed to cleanup {user}'s server that failed to start".format(
user=self.name,
), exc_info=True)
# raise original TimeoutError
raise e
spawner.start_polling()
# store state
@@ -320,7 +337,7 @@ class User(Base):
try:
yield self.server.wait_up(http=True)
except TimeoutError as e:
self.log.warn("{user}'s server never started at {url}, giving up.".format(
self.log.warn("{user}'s server never showed up at {url}, giving up".format(
user=self.name, url=self.server.url,
))
try:
@@ -331,6 +348,7 @@ class User(Base):
), exc_info=True)
# raise original TimeoutError
raise e
self.spawn_pending = False
return self
@gen.coroutine
@@ -339,6 +357,7 @@ class User(Base):
and cleanup after it.
"""
self.spawn_pending = False
if self.spawner is None:
return
self.spawner.stop_polling()

View File

@@ -40,6 +40,14 @@ class Spawner(LoggingConfigurable):
user = Any()
hub = Any()
api_token = Unicode()
start_timeout = Integer(60, config=True,
help="""Timeout (in seconds) before giving up on the spawner.
This is the timeout for start to return, not the timeout for the server to respond.
Callers of spawner.start will assume that startup has failed if it takes longer than this.
start should return when the server process is started and its location is known.
"""
)
poll_interval = Integer(30, config=True,
help="""Interval (in seconds) on which to poll the spawner."""

View File

@@ -1,11 +1,14 @@
"""mock utilities for testing"""
import sys
from datetime import timedelta
from tempfile import NamedTemporaryFile
import threading
from unittest import mock
from tornado import gen
from tornado.concurrent import Future
from tornado.ioloop import IOLoop
from ..spawner import LocalProcessSpawner
@@ -41,6 +44,26 @@ class MockSpawner(LocalProcessSpawner):
return [sys.executable, '-m', 'jupyterhub.tests.mocksu']
class SlowSpawner(MockSpawner):
"""A spawner that takes a few seconds to start"""
@gen.coroutine
def start(self):
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=5))
yield super().start()
class NeverSpawner(MockSpawner):
"""A spawner that will never start"""
def _start_timeout_default(self):
return 1
def start(self):
"""Return a Future that will never finish"""
return Future()
class MockPAMAuthenticator(PAMAuthenticator):
def system_user_exists(self, user):
# skip the add-system-user bit

View File

@@ -1,11 +1,15 @@
"""Tests for the REST API"""
import json
from datetime import timedelta
import requests
from tornado import gen
from ..utils import url_path_join as ujoin
from .. import orm
from . import mocking
def check_db_locks(func):
@@ -174,6 +178,7 @@ def test_spawn(app, io_loop):
assert r.status_code == 201
assert 'pid' in user.state
assert user.spawner is not None
assert not user.spawn_pending
status = io_loop.run_sync(user.spawner.poll)
assert status is None
@@ -194,3 +199,47 @@ def test_spawn(app, io_loop):
assert 'pid' not in user.state
status = io_loop.run_sync(user.spawner.poll)
assert status == 0
def test_slow_spawn(app, io_loop):
app.tornado_application.settings['spawner_class'] = mocking.SlowSpawner
app.tornado_application.settings['slow_spawn_timeout'] = 0
db = app.db
name = 'zoe'
user = add_user(db, name=name)
r = api_request(app, 'users', name, 'server', method='post')
assert user.spawner is not None
assert user.spawn_pending
dt = timedelta(seconds=0.1)
@gen.coroutine
def wait_pending():
while user.spawn_pending:
yield gen.Task(io_loop.add_timeout, dt)
io_loop.run_sync(wait_pending)
assert not user.spawn_pending
status = io_loop.run_sync(user.spawner.poll)
assert status is None
def test_never_spawn(app, io_loop):
app.tornado_application.settings['spawner_class'] = mocking.NeverSpawner
app.tornado_application.settings['slow_spawn_timeout'] = 0
db = app.db
name = 'badger'
user = add_user(db, name=name)
r = api_request(app, 'users', name, 'server', method='post')
assert user.spawner is not None
assert user.spawn_pending
dt = timedelta(seconds=0.1)
@gen.coroutine
def wait_pending():
while user.spawn_pending:
yield gen.Task(io_loop.add_timeout, dt)
io_loop.run_sync(wait_pending)
assert not user.spawn_pending
status = io_loop.run_sync(user.spawner.poll)
assert status is not None

View File

@@ -0,0 +1,28 @@
{% extends "page.html" %}
{% block main %}
<div class="container">
<div class="row">
<div class="text-center">
<p>Your server is starting up.</p>
<p>You will be redirected automatically when it's ready for you.</p>
<a id="refresh" class="btn btn-lg btn-primary" href="#">refresh</a>
</div>
</div>
</div>
{% endblock %}
{% block script %}
<script type="text/javascript">
require(["jquery"], function ($) {
$("#refresh").click(function () {
window.location.reload();
})
setTimeout(function () {
window.location.reload();
}, 5000);
});
</script>
{% endblock %}