Replace failed spawners when starting new launch

Avoids leaving stale state when re-using a spawner that failed the last time it started

we keep failed spawners around to track their errors,
but we don't want to re-use them when it comes time to start a new launch.

adds User.get_spawner(server_name, replace_failed=True) to always get a non-failed Spawner
This commit is contained in:
Min RK
2022-02-22 10:42:45 +01:00
parent a3ea0f0449
commit 7861662e17
5 changed files with 64 additions and 13 deletions

View File

@@ -515,7 +515,7 @@ class UserServerAPIHandler(APIHandler):
user_name, self.named_server_limit_per_user
),
)
spawner = user.spawners[server_name]
spawner = user.get_spawner(server_name, replace_failed=True)
pending = spawner.pending
if pending == 'spawn':
self.set_header('Content-Type', 'text/plain')

View File

@@ -151,7 +151,7 @@ class SpawnHandler(BaseHandler):
self.redirect(url)
return
spawner = user.spawners[server_name]
spawner = user.get_spawner(server_name, replace_failed=True)
pending_url = self._get_pending_url(user, server_name)
@@ -237,7 +237,7 @@ class SpawnHandler(BaseHandler):
if user is None:
raise web.HTTPError(404, "No such user: %s" % for_user)
spawner = user.spawners[server_name]
spawner = user.get_spawner(server_name, replace_failed=True)
if spawner.ready:
raise web.HTTPError(400, "%s is already running" % (spawner._log_name))
@@ -369,13 +369,9 @@ class SpawnPendingHandler(BaseHandler):
auth_state = await user.get_auth_state()
# First, check for previous failure.
if (
not spawner.active
and spawner._spawn_future
and spawner._spawn_future.done()
and spawner._spawn_future.exception()
):
# Condition: spawner not active and _spawn_future exists and contains an Exception
if not spawner.active and spawner._failed:
# Condition: spawner not active and last spawn failed
# (failure is available as spawner._spawn_future.exception()).
# Implicit spawn on /user/:name is not allowed if the user's last spawn failed.
# We should point the user to Home if the most recent spawn failed.
exc = spawner._spawn_future.exception()

View File

@@ -1030,7 +1030,7 @@ async def test_never_spawn(app, no_patience, never_spawn):
assert not app_user.spawner._spawn_pending
status = await app_user.spawner.poll()
assert status is not None
# failed spawn should decrements pending count
# failed spawn should decrement pending count
assert app.users.count_active_users()['pending'] == 0
@@ -1039,9 +1039,16 @@ async def test_bad_spawn(app, bad_spawn):
name = 'prim'
user = add_user(db, app=app, name=name)
r = await api_request(app, 'users', name, 'server', method='post')
# check that we don't re-use spawners that failed
user.spawners[''].reused = True
assert r.status_code == 500
assert app.users.count_active_users()['pending'] == 0
r = await api_request(app, 'users', name, 'server', method='post')
# check that we don't re-use spawners that failed
spawner = user.spawners['']
assert not getattr(spawner, 'reused', False)
async def test_spawn_nosuch_user(app):
r = await api_request(app, 'users', "nosuchuser", 'server', method='post')

View File

@@ -128,11 +128,20 @@ async def test_admin_sort(app, sort):
assert r.status_code == 200
async def test_spawn_redirect(app):
@pytest.mark.parametrize("last_failed", [True, False])
async def test_spawn_redirect(app, last_failed):
name = 'wash'
cookies = await app.login_user(name)
u = app.users[orm.User.find(app.db, name)]
if last_failed:
# mock a failed spawn
last_spawner = u.spawners['']
last_spawner._spawn_future = asyncio.Future()
last_spawner._spawn_future.set_exception(RuntimeError("I failed!"))
else:
last_spawner = None
status = await u.spawner.poll()
assert status is not None
@@ -141,6 +150,10 @@ async def test_spawn_redirect(app):
r.raise_for_status()
print(urlparse(r.url))
path = urlparse(r.url).path
# ensure we got a new spawner
assert u.spawners[''] is not last_spawner
# make sure we visited hub/spawn-pending after spawn
# if spawn was really quick, we might get redirected all the way to the running server,
# so check history instead of r.url
@@ -258,6 +271,25 @@ async def test_spawn_page(app):
assert FormSpawner.options_form in r.text
async def test_spawn_page_after_failed(app, user):
cookies = await app.login_user(user.name)
# mock a failed spawn
last_spawner = user.spawners['']
last_spawner._spawn_future = asyncio.Future()
last_spawner._spawn_future.set_exception(RuntimeError("I failed!"))
with mock.patch.dict(app.users.settings, {'spawner_class': FormSpawner}):
r = await get_page('spawn', app, cookies=cookies)
spawner = user.spawners['']
# make sure we didn't reuse last spawner
assert isinstance(spawner, FormSpawner)
assert spawner is not last_spawner
assert r.url.endswith('/spawn')
spawner = user.spawners['']
assert FormSpawner.options_form in r.text
async def test_spawn_page_falsy_callable(app):
with mock.patch.dict(
app.users.settings, {'spawner_class': FalsyCallableFormSpawner}

View File

@@ -253,6 +253,22 @@ class User:
def spawner_class(self):
return self.settings.get('spawner_class', LocalProcessSpawner)
def get_spawner(self, server_name="", replace_failed=False):
"""Get a spawner by name
replace_failed governs whether a failed spawner should be replaced
or returned (default: returned).
.. versionadded:: 2.2
"""
spawner = self.spawners[server_name]
if replace_failed and spawner._failed:
self.log.debug(f"Discarding failed spawner {spawner._log_name}")
# remove failed spawner, create a new one
self.spawners.pop(server_name)
spawner = self.spawners[server_name]
return spawner
def sync_groups(self, group_names):
"""Synchronize groups with database"""
@@ -628,7 +644,7 @@ class User:
api_token = self.new_api_token(note=note, roles=['server'])
db.commit()
spawner = self.spawners[server_name]
spawner = self.get_spawner(server_name, replace_failed=True)
spawner.server = server = Server(orm_server=orm_server)
assert spawner.orm_spawner.server is orm_server