test spawn_pending_count

make sure to cover various cases of spawn failure
so that failed spawns don't leak pending-spawn counts
This commit is contained in:
Min RK
2017-07-31 11:50:20 +02:00
parent 744d96330e
commit 88268bd76f
4 changed files with 150 additions and 15 deletions

View File

@@ -390,8 +390,6 @@ class BaseHandler(RequestHandler):
429, 429,
"User startup rate limit exceeded. Try to start again in a few minutes.") "User startup rate limit exceeded. Try to start again in a few minutes.")
# FIXME: Move this out of settings, since this isn't really a setting
self.spawn_pending_count += 1
tic = IOLoop.current().time() tic = IOLoop.current().time()
user_server_name = user.name user_server_name = user.name
if server_name: if server_name:
@@ -402,6 +400,14 @@ class BaseHandler(RequestHandler):
self.log.debug("Initiating spawn for %s", user_server_name) self.log.debug("Initiating spawn for %s", user_server_name)
f = user.spawn(server_name, options) f = user.spawn(server_name, options)
# increment spawn_pending only after spawn starts
self.log.debug("%i%s concurrent spawns",
self.spawn_pending_count,
'/%i' % concurrent_spawn_limit if concurrent_spawn_limit else '')
# FIXME: Move this out of settings, since this isn't really a setting
self.spawn_pending_count += 1
spawner = user.spawners[server_name] spawner = user.spawners[server_name]
spawner._proxy_pending = True spawner._proxy_pending = True
@@ -414,6 +420,7 @@ class BaseHandler(RequestHandler):
""" """
if f and f.exception() is not None: if f and f.exception() is not None:
# failed, don't add to the proxy # failed, don't add to the proxy
self.spawn_pending_count -= 1
return return
toc = IOLoop.current().time() toc = IOLoop.current().time()
self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic) self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic)
@@ -455,9 +462,14 @@ class BaseHandler(RequestHandler):
# schedule finish for when the user finishes spawning # schedule finish for when the user finishes spawning
IOLoop.current().add_future(f, finish_user_spawn) IOLoop.current().add_future(f, finish_user_spawn)
else: else:
self.spawn_pending_count -= 1
toc = IOLoop.current().time() toc = IOLoop.current().time()
self.statsd.timing('spawner.failure', (toc - tic) * 1000) self.statsd.timing('spawner.failure', (toc - tic) * 1000)
raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status) raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status)
except Exception:
# error in start
self.spawn_pending_count -= 1
raise
else: else:
yield finish_user_spawn() yield finish_user_spawn()

View File

@@ -14,6 +14,7 @@ from tornado import ioloop, gen
from .. import orm from .. import orm
from ..utils import random_port from ..utils import random_port
from . import mocking
from .mocking import MockHub from .mocking import MockHub
from .test_services import mockservice_cmd from .test_services import mockservice_cmd
@@ -134,3 +135,36 @@ def no_patience(app):
{'slow_spawn_timeout': 0, {'slow_spawn_timeout': 0,
'slow_stop_timeout': 0}): 'slow_stop_timeout': 0}):
yield yield
@fixture
def slow_spawn(app):
"""Fixture enabling SlowSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.SlowSpawner}):
yield
@fixture
def never_spawn(app):
"""Fixture enabling NeverSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.NeverSpawner}):
yield
@fixture
def bad_spawn(app):
"""Fixture enabling BadSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.BadSpawner}):
yield
@fixture
def slow_bad_spawn(app):
"""Fixture enabling SlowBadSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.SlowBadSpawner}):
yield

View File

@@ -94,6 +94,22 @@ class NeverSpawner(MockSpawner):
return 0 return 0
class BadSpawner(MockSpawner):
"""Spawner that fails immediately"""
def start(self):
raise RuntimeError("I don't work!")
class SlowBadSpawner(MockSpawner):
"""Spawner that fails after a short delay"""
@gen.coroutine
def start(self):
yield gen.sleep(0.1)
raise RuntimeError("I don't work!")
class FormSpawner(MockSpawner): class FormSpawner(MockSpawner):
"""A spawner that has an options form defined""" """A spawner that has an options form defined"""
options_form = "IMAFORM" options_form = "IMAFORM"

View File

@@ -6,6 +6,7 @@ import sys
from unittest import mock from unittest import mock
from urllib.parse import urlparse, quote from urllib.parse import urlparse, quote
import pytest
from pytest import mark from pytest import mark
import requests import requests
@@ -391,6 +392,7 @@ def test_make_admin(app):
@mark.gen_test @mark.gen_test
def test_spawn(app): def test_spawn(app):
settings = app.tornado_application.settings
db = app.db db = app.db
name = 'wash' name = 'wash'
user = add_user(db, app=app, name=name) user = add_user(db, app=app, name=name)
@@ -442,23 +444,22 @@ def test_spawn(app):
assert before_servers == after_servers assert before_servers == after_servers
tokens = list(db.query(orm.APIToken).filter(orm.APIToken.user_id == user.id)) tokens = list(db.query(orm.APIToken).filter(orm.APIToken.user_id == user.id))
assert tokens == [] assert tokens == []
assert settings['_spawn_pending_count'] == 0
@mark.gen_test @mark.gen_test
def test_slow_spawn(app, no_patience, request): def test_slow_spawn(app, no_patience, slow_spawn):
patch = mock.patch.dict(app.tornado_settings, {'spawner_class': mocking.SlowSpawner}) settings = app.tornado_application.settings
patch.start()
request.addfinalizer(patch.stop)
db = app.db db = app.db
name = 'zoe' name = 'zoe'
user = add_user(db, app=app, name=name) app_user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post') r = yield api_request(app, 'users', name, 'server', method='post')
r.raise_for_status() r.raise_for_status()
assert r.status_code == 202 assert r.status_code == 202
app_user = app.users[name]
assert app_user.spawner is not None assert app_user.spawner is not None
assert app_user.spawner._spawn_pending assert app_user.spawner._spawn_pending
assert not app_user.spawner._stop_pending assert not app_user.spawner._stop_pending
assert settings['_spawn_pending_count'] == 1
@gen.coroutine @gen.coroutine
def wait_spawn(): def wait_spawn():
@@ -492,21 +493,19 @@ def test_slow_spawn(app, no_patience, request):
assert app_user.spawner is not None assert app_user.spawner is not None
r = yield api_request(app, 'users', name, 'server', method='delete') r = yield api_request(app, 'users', name, 'server', method='delete')
assert r.status_code == 400 assert r.status_code == 400
assert settings['_spawn_pending_count'] == 0
@mark.gen_test @mark.gen_test
def test_never_spawn(app, no_patience, request): def test_never_spawn(app, no_patience, never_spawn):
patch = mock.patch.dict(app.tornado_settings, {'spawner_class': mocking.NeverSpawner}) settings = app.tornado_application.settings
patch.start()
request.addfinalizer(patch.stop)
db = app.db db = app.db
name = 'badger' name = 'badger'
user = add_user(db, app=app, name=name) app_user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post') r = yield api_request(app, 'users', name, 'server', method='post')
app_user = app.users[name]
assert app_user.spawner is not None assert app_user.spawner is not None
assert app_user.spawner._spawn_pending assert app_user.spawner._spawn_pending
assert settings['_spawn_pending_count'] == 1
@gen.coroutine @gen.coroutine
def wait_pending(): def wait_pending():
@@ -517,6 +516,80 @@ def test_never_spawn(app, no_patience, request):
assert not app_user.spawner._spawn_pending assert not app_user.spawner._spawn_pending
status = yield app_user.spawner.poll() status = yield app_user.spawner.poll()
assert status is not None assert status is not None
# failed spawn should decrements pending count
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_bad_spawn(app, no_patience, bad_spawn):
settings = app.tornado_application.settings
db = app.db
name = 'prim'
user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
assert r.status_code == 500
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_slow_bad_spawn(app, no_patience, slow_bad_spawn):
settings = app.tornado_application.settings
db = app.db
name = 'zaphod'
user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
r.raise_for_status()
while user.spawner._spawn_pending:
yield gen.sleep(0.1)
# spawn failed
assert not user.running('')
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_spawn_limit(app, no_patience, slow_spawn, request):
db = app.db
settings = app.tornado_application.settings
settings['concurrent_spawn_limit'] = 2
def _restore_limit():
settings['concurrent_spawn_limit'] = 100
request.addfinalizer(_restore_limit)
# start two pending spawns
names = ['ykka', 'hjarka']
users = [ add_user(db, app=app, name=name) for name in names ]
for name in names:
yield api_request(app, 'users', name, 'server', method='post')
yield gen.sleep(0.5)
assert settings['_spawn_pending_count'] == 2
# ykka and hjarka's spawns are pending. Essun should fail with 429
name = 'essun'
user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
assert r.status_code == 429
# wait for ykka to finish
while not users[0].running(''):
yield gen.sleep(0.1)
# race? hjarka could finish in this time
# come back to this if we see intermittent failures here
assert settings['_spawn_pending_count'] == 1
r = yield api_request(app, 'users', name, 'server', method='post')
r.raise_for_status()
assert settings['_spawn_pending_count'] == 2
users.append(user)
while not all(u.running('') for u in users):
yield gen.sleep(0.1)
# everybody's running, pending count should be back to 0
assert settings['_spawn_pending_count'] == 0
for u in users:
r = yield api_request(app, 'users', u.name, 'server', method='delete')
yield r.raise_for_status()
while any(u.running('') for u in users):
yield gen.sleep(0.1)
@mark.gen_test @mark.gen_test