diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 3a8c6ad2..12001e74 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -390,8 +390,6 @@ class BaseHandler(RequestHandler): 429, "User startup rate limit exceeded. Try to start again in a few minutes.") - # FIXME: Move this out of settings, since this isn't really a setting - self.spawn_pending_count += 1 tic = IOLoop.current().time() user_server_name = user.name if server_name: @@ -402,6 +400,14 @@ class BaseHandler(RequestHandler): self.log.debug("Initiating spawn for %s", user_server_name) f = user.spawn(server_name, options) + + # increment spawn_pending only after spawn starts + self.log.debug("%i%s concurrent spawns", + self.spawn_pending_count, + '/%i' % concurrent_spawn_limit if concurrent_spawn_limit else '') + # FIXME: Move this out of settings, since this isn't really a setting + self.spawn_pending_count += 1 + spawner = user.spawners[server_name] spawner._proxy_pending = True @@ -414,6 +420,7 @@ class BaseHandler(RequestHandler): """ if f and f.exception() is not None: # failed, don't add to the proxy + self.spawn_pending_count -= 1 return toc = IOLoop.current().time() self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic) @@ -455,9 +462,14 @@ class BaseHandler(RequestHandler): # schedule finish for when the user finishes spawning IOLoop.current().add_future(f, finish_user_spawn) else: + self.spawn_pending_count -= 1 toc = IOLoop.current().time() self.statsd.timing('spawner.failure', (toc - tic) * 1000) raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status) + except Exception: + # error in start + self.spawn_pending_count -= 1 + raise else: yield finish_user_spawn() diff --git a/jupyterhub/tests/conftest.py b/jupyterhub/tests/conftest.py index 33e96351..3eb272c1 100644 --- a/jupyterhub/tests/conftest.py +++ b/jupyterhub/tests/conftest.py @@ -14,6 +14,7 @@ from tornado import ioloop, gen from .. import orm from ..utils import random_port +from . import mocking from .mocking import MockHub from .test_services import mockservice_cmd @@ -134,3 +135,36 @@ def no_patience(app): {'slow_spawn_timeout': 0, 'slow_stop_timeout': 0}): yield + + +@fixture +def slow_spawn(app): + """Fixture enabling SlowSpawner""" + with mock.patch.dict(app.tornado_settings, + {'spawner_class': mocking.SlowSpawner}): + yield + + +@fixture +def never_spawn(app): + """Fixture enabling NeverSpawner""" + with mock.patch.dict(app.tornado_settings, + {'spawner_class': mocking.NeverSpawner}): + yield + + +@fixture +def bad_spawn(app): + """Fixture enabling BadSpawner""" + with mock.patch.dict(app.tornado_settings, + {'spawner_class': mocking.BadSpawner}): + yield + + +@fixture +def slow_bad_spawn(app): + """Fixture enabling SlowBadSpawner""" + with mock.patch.dict(app.tornado_settings, + {'spawner_class': mocking.SlowBadSpawner}): + yield + diff --git a/jupyterhub/tests/mocking.py b/jupyterhub/tests/mocking.py index 560c0d24..5437bbd3 100644 --- a/jupyterhub/tests/mocking.py +++ b/jupyterhub/tests/mocking.py @@ -94,6 +94,22 @@ class NeverSpawner(MockSpawner): return 0 +class BadSpawner(MockSpawner): + """Spawner that fails immediately""" + def start(self): + raise RuntimeError("I don't work!") + + +class SlowBadSpawner(MockSpawner): + """Spawner that fails after a short delay""" + + @gen.coroutine + def start(self): + yield gen.sleep(0.1) + raise RuntimeError("I don't work!") + + + class FormSpawner(MockSpawner): """A spawner that has an options form defined""" options_form = "IMAFORM" diff --git a/jupyterhub/tests/test_api.py b/jupyterhub/tests/test_api.py index 46a6d4f8..7a239006 100644 --- a/jupyterhub/tests/test_api.py +++ b/jupyterhub/tests/test_api.py @@ -6,6 +6,7 @@ import sys from unittest import mock from urllib.parse import urlparse, quote +import pytest from pytest import mark import requests @@ -391,6 +392,7 @@ def test_make_admin(app): @mark.gen_test def test_spawn(app): + settings = app.tornado_application.settings db = app.db name = 'wash' user = add_user(db, app=app, name=name) @@ -442,23 +444,22 @@ def test_spawn(app): assert before_servers == after_servers tokens = list(db.query(orm.APIToken).filter(orm.APIToken.user_id == user.id)) assert tokens == [] + assert settings['_spawn_pending_count'] == 0 @mark.gen_test -def test_slow_spawn(app, no_patience, request): - patch = mock.patch.dict(app.tornado_settings, {'spawner_class': mocking.SlowSpawner}) - patch.start() - request.addfinalizer(patch.stop) +def test_slow_spawn(app, no_patience, slow_spawn): + settings = app.tornado_application.settings db = app.db name = 'zoe' - user = add_user(db, app=app, name=name) + app_user = add_user(db, app=app, name=name) r = yield api_request(app, 'users', name, 'server', method='post') r.raise_for_status() assert r.status_code == 202 - app_user = app.users[name] assert app_user.spawner is not None assert app_user.spawner._spawn_pending assert not app_user.spawner._stop_pending + assert settings['_spawn_pending_count'] == 1 @gen.coroutine def wait_spawn(): @@ -492,21 +493,19 @@ def test_slow_spawn(app, no_patience, request): assert app_user.spawner is not None r = yield api_request(app, 'users', name, 'server', method='delete') assert r.status_code == 400 + assert settings['_spawn_pending_count'] == 0 @mark.gen_test -def test_never_spawn(app, no_patience, request): - patch = mock.patch.dict(app.tornado_settings, {'spawner_class': mocking.NeverSpawner}) - patch.start() - request.addfinalizer(patch.stop) - +def test_never_spawn(app, no_patience, never_spawn): + settings = app.tornado_application.settings db = app.db name = 'badger' - user = add_user(db, app=app, name=name) + app_user = add_user(db, app=app, name=name) r = yield api_request(app, 'users', name, 'server', method='post') - app_user = app.users[name] assert app_user.spawner is not None assert app_user.spawner._spawn_pending + assert settings['_spawn_pending_count'] == 1 @gen.coroutine def wait_pending(): @@ -517,6 +516,80 @@ def test_never_spawn(app, no_patience, request): assert not app_user.spawner._spawn_pending status = yield app_user.spawner.poll() assert status is not None + # failed spawn should decrements pending count + assert settings['_spawn_pending_count'] == 0 + + +@mark.gen_test +def test_bad_spawn(app, no_patience, bad_spawn): + settings = app.tornado_application.settings + db = app.db + name = 'prim' + user = add_user(db, app=app, name=name) + r = yield api_request(app, 'users', name, 'server', method='post') + assert r.status_code == 500 + assert settings['_spawn_pending_count'] == 0 + + +@mark.gen_test +def test_slow_bad_spawn(app, no_patience, slow_bad_spawn): + settings = app.tornado_application.settings + db = app.db + name = 'zaphod' + user = add_user(db, app=app, name=name) + r = yield api_request(app, 'users', name, 'server', method='post') + r.raise_for_status() + while user.spawner._spawn_pending: + yield gen.sleep(0.1) + # spawn failed + assert not user.running('') + assert settings['_spawn_pending_count'] == 0 + + +@mark.gen_test +def test_spawn_limit(app, no_patience, slow_spawn, request): + db = app.db + settings = app.tornado_application.settings + settings['concurrent_spawn_limit'] = 2 + def _restore_limit(): + settings['concurrent_spawn_limit'] = 100 + request.addfinalizer(_restore_limit) + + # start two pending spawns + names = ['ykka', 'hjarka'] + users = [ add_user(db, app=app, name=name) for name in names ] + for name in names: + yield api_request(app, 'users', name, 'server', method='post') + yield gen.sleep(0.5) + assert settings['_spawn_pending_count'] == 2 + + # ykka and hjarka's spawns are pending. Essun should fail with 429 + name = 'essun' + user = add_user(db, app=app, name=name) + r = yield api_request(app, 'users', name, 'server', method='post') + assert r.status_code == 429 + + # wait for ykka to finish + while not users[0].running(''): + yield gen.sleep(0.1) + + # race? hjarka could finish in this time + # come back to this if we see intermittent failures here + assert settings['_spawn_pending_count'] == 1 + r = yield api_request(app, 'users', name, 'server', method='post') + r.raise_for_status() + assert settings['_spawn_pending_count'] == 2 + users.append(user) + while not all(u.running('') for u in users): + yield gen.sleep(0.1) + + # everybody's running, pending count should be back to 0 + assert settings['_spawn_pending_count'] == 0 + for u in users: + r = yield api_request(app, 'users', u.name, 'server', method='delete') + yield r.raise_for_status() + while any(u.running('') for u in users): + yield gen.sleep(0.1) @mark.gen_test