test spawn_pending_count

make sure to cover various cases of spawn failure
so that failed spawns don't leak pending-spawn counts
This commit is contained in:
Min RK
2017-07-31 11:50:20 +02:00
parent 744d96330e
commit 88268bd76f
4 changed files with 150 additions and 15 deletions

View File

@@ -390,8 +390,6 @@ class BaseHandler(RequestHandler):
429,
"User startup rate limit exceeded. Try to start again in a few minutes.")
# FIXME: Move this out of settings, since this isn't really a setting
self.spawn_pending_count += 1
tic = IOLoop.current().time()
user_server_name = user.name
if server_name:
@@ -402,6 +400,14 @@ class BaseHandler(RequestHandler):
self.log.debug("Initiating spawn for %s", user_server_name)
f = user.spawn(server_name, options)
# increment spawn_pending only after spawn starts
self.log.debug("%i%s concurrent spawns",
self.spawn_pending_count,
'/%i' % concurrent_spawn_limit if concurrent_spawn_limit else '')
# FIXME: Move this out of settings, since this isn't really a setting
self.spawn_pending_count += 1
spawner = user.spawners[server_name]
spawner._proxy_pending = True
@@ -414,6 +420,7 @@ class BaseHandler(RequestHandler):
"""
if f and f.exception() is not None:
# failed, don't add to the proxy
self.spawn_pending_count -= 1
return
toc = IOLoop.current().time()
self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic)
@@ -455,9 +462,14 @@ class BaseHandler(RequestHandler):
# schedule finish for when the user finishes spawning
IOLoop.current().add_future(f, finish_user_spawn)
else:
self.spawn_pending_count -= 1
toc = IOLoop.current().time()
self.statsd.timing('spawner.failure', (toc - tic) * 1000)
raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status)
except Exception:
# error in start
self.spawn_pending_count -= 1
raise
else:
yield finish_user_spawn()

View File

@@ -14,6 +14,7 @@ from tornado import ioloop, gen
from .. import orm
from ..utils import random_port
from . import mocking
from .mocking import MockHub
from .test_services import mockservice_cmd
@@ -134,3 +135,36 @@ def no_patience(app):
{'slow_spawn_timeout': 0,
'slow_stop_timeout': 0}):
yield
@fixture
def slow_spawn(app):
"""Fixture enabling SlowSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.SlowSpawner}):
yield
@fixture
def never_spawn(app):
"""Fixture enabling NeverSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.NeverSpawner}):
yield
@fixture
def bad_spawn(app):
"""Fixture enabling BadSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.BadSpawner}):
yield
@fixture
def slow_bad_spawn(app):
"""Fixture enabling SlowBadSpawner"""
with mock.patch.dict(app.tornado_settings,
{'spawner_class': mocking.SlowBadSpawner}):
yield

View File

@@ -94,6 +94,22 @@ class NeverSpawner(MockSpawner):
return 0
class BadSpawner(MockSpawner):
"""Spawner that fails immediately"""
def start(self):
raise RuntimeError("I don't work!")
class SlowBadSpawner(MockSpawner):
"""Spawner that fails after a short delay"""
@gen.coroutine
def start(self):
yield gen.sleep(0.1)
raise RuntimeError("I don't work!")
class FormSpawner(MockSpawner):
"""A spawner that has an options form defined"""
options_form = "IMAFORM"

View File

@@ -6,6 +6,7 @@ import sys
from unittest import mock
from urllib.parse import urlparse, quote
import pytest
from pytest import mark
import requests
@@ -391,6 +392,7 @@ def test_make_admin(app):
@mark.gen_test
def test_spawn(app):
settings = app.tornado_application.settings
db = app.db
name = 'wash'
user = add_user(db, app=app, name=name)
@@ -442,23 +444,22 @@ def test_spawn(app):
assert before_servers == after_servers
tokens = list(db.query(orm.APIToken).filter(orm.APIToken.user_id == user.id))
assert tokens == []
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_slow_spawn(app, no_patience, request):
patch = mock.patch.dict(app.tornado_settings, {'spawner_class': mocking.SlowSpawner})
patch.start()
request.addfinalizer(patch.stop)
def test_slow_spawn(app, no_patience, slow_spawn):
settings = app.tornado_application.settings
db = app.db
name = 'zoe'
user = add_user(db, app=app, name=name)
app_user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
r.raise_for_status()
assert r.status_code == 202
app_user = app.users[name]
assert app_user.spawner is not None
assert app_user.spawner._spawn_pending
assert not app_user.spawner._stop_pending
assert settings['_spawn_pending_count'] == 1
@gen.coroutine
def wait_spawn():
@@ -492,21 +493,19 @@ def test_slow_spawn(app, no_patience, request):
assert app_user.spawner is not None
r = yield api_request(app, 'users', name, 'server', method='delete')
assert r.status_code == 400
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_never_spawn(app, no_patience, request):
patch = mock.patch.dict(app.tornado_settings, {'spawner_class': mocking.NeverSpawner})
patch.start()
request.addfinalizer(patch.stop)
def test_never_spawn(app, no_patience, never_spawn):
settings = app.tornado_application.settings
db = app.db
name = 'badger'
user = add_user(db, app=app, name=name)
app_user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
app_user = app.users[name]
assert app_user.spawner is not None
assert app_user.spawner._spawn_pending
assert settings['_spawn_pending_count'] == 1
@gen.coroutine
def wait_pending():
@@ -517,6 +516,80 @@ def test_never_spawn(app, no_patience, request):
assert not app_user.spawner._spawn_pending
status = yield app_user.spawner.poll()
assert status is not None
# failed spawn should decrements pending count
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_bad_spawn(app, no_patience, bad_spawn):
settings = app.tornado_application.settings
db = app.db
name = 'prim'
user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
assert r.status_code == 500
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_slow_bad_spawn(app, no_patience, slow_bad_spawn):
settings = app.tornado_application.settings
db = app.db
name = 'zaphod'
user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
r.raise_for_status()
while user.spawner._spawn_pending:
yield gen.sleep(0.1)
# spawn failed
assert not user.running('')
assert settings['_spawn_pending_count'] == 0
@mark.gen_test
def test_spawn_limit(app, no_patience, slow_spawn, request):
db = app.db
settings = app.tornado_application.settings
settings['concurrent_spawn_limit'] = 2
def _restore_limit():
settings['concurrent_spawn_limit'] = 100
request.addfinalizer(_restore_limit)
# start two pending spawns
names = ['ykka', 'hjarka']
users = [ add_user(db, app=app, name=name) for name in names ]
for name in names:
yield api_request(app, 'users', name, 'server', method='post')
yield gen.sleep(0.5)
assert settings['_spawn_pending_count'] == 2
# ykka and hjarka's spawns are pending. Essun should fail with 429
name = 'essun'
user = add_user(db, app=app, name=name)
r = yield api_request(app, 'users', name, 'server', method='post')
assert r.status_code == 429
# wait for ykka to finish
while not users[0].running(''):
yield gen.sleep(0.1)
# race? hjarka could finish in this time
# come back to this if we see intermittent failures here
assert settings['_spawn_pending_count'] == 1
r = yield api_request(app, 'users', name, 'server', method='post')
r.raise_for_status()
assert settings['_spawn_pending_count'] == 2
users.append(user)
while not all(u.running('') for u in users):
yield gen.sleep(0.1)
# everybody's running, pending count should be back to 0
assert settings['_spawn_pending_count'] == 0
for u in users:
r = yield api_request(app, 'users', u.name, 'server', method='delete')
yield r.raise_for_status()
while any(u.running('') for u in users):
yield gen.sleep(0.1)
@mark.gen_test