mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-13 13:03:01 +00:00
handle Spawners that are slow to stop
e.g. docker, which can take a long time to stop, especially if several docker actions are already queued. Use status `202: Accepted` for API replies sent with spawn/stop still pending
This commit is contained in:
@@ -17,7 +17,7 @@ class BaseUserHandler(APIHandler):
|
||||
return {
|
||||
'name': user.name,
|
||||
'admin': user.admin,
|
||||
'server': user.server.base_url if user.server else None,
|
||||
'server': user.server.base_url if user.server and not (user.spawn_pending or user.stop_pending) else None,
|
||||
'last_activity': user.last_activity.isoformat(),
|
||||
}
|
||||
|
||||
@@ -101,8 +101,12 @@ class UserAPIHandler(BaseUserHandler):
|
||||
raise web.HTTPError(404)
|
||||
if user.name == self.get_current_user().name:
|
||||
raise web.HTTPError(400, "Cannot delete yourself!")
|
||||
if user.stop_pending:
|
||||
raise web.HTTPError(400, "%s's server is in the process of stopping, please wait." % name)
|
||||
if user.spawner is not None:
|
||||
yield self.stop_single_user(user)
|
||||
if user.stop_pending:
|
||||
raise web.HTTPError(400, "%s's server is in the process of stopping, please wait." % name)
|
||||
|
||||
yield gen.maybe_future(self.authenticator.delete_user(user))
|
||||
|
||||
@@ -136,16 +140,24 @@ class UserServerAPIHandler(BaseUserHandler):
|
||||
raise web.HTTPError(400, "%s's server is already running" % name)
|
||||
|
||||
yield self.spawn_single_user(user)
|
||||
self.set_status(201)
|
||||
status = 202 if user.spawn_pending else 201
|
||||
self.set_status(status)
|
||||
|
||||
@gen.coroutine
|
||||
@admin_or_self
|
||||
def delete(self, name):
|
||||
user = self.find_user(name)
|
||||
if user.stop_pending:
|
||||
self.set_status(202)
|
||||
return
|
||||
if user.spawner is None:
|
||||
raise web.HTTPError(400, "%s's server is not running" % name)
|
||||
status = yield user.spawner.poll()
|
||||
if status is not None:
|
||||
raise web.HTTPError(400, "%s's server is not running" % name)
|
||||
yield self.stop_single_user(user)
|
||||
self.set_status(204)
|
||||
status = 202 if user.stop_pending else 204
|
||||
self.set_status(status)
|
||||
|
||||
default_handlers = [
|
||||
(r"/api/users", UserListAPIHandler),
|
||||
|
@@ -163,12 +163,20 @@ class BaseHandler(RequestHandler):
|
||||
def slow_spawn_timeout(self):
|
||||
return self.settings.get('slow_spawn_timeout', 10)
|
||||
|
||||
@property
|
||||
def slow_stop_timeout(self):
|
||||
return self.settings.get('slow_stop_timeout', 10)
|
||||
|
||||
@property
|
||||
def spawner_class(self):
|
||||
return self.settings.get('spawner_class', LocalProcessSpawner)
|
||||
|
||||
@gen.coroutine
|
||||
def spawn_single_user(self, user):
|
||||
if user.spawn_pending:
|
||||
raise RuntimeError("Spawn already pending for: %s" % user.name)
|
||||
tic = IOLoop.current().time()
|
||||
|
||||
f = user.spawn(
|
||||
spawner_class=self.spawner_class,
|
||||
base_url=self.base_url,
|
||||
@@ -185,6 +193,8 @@ class BaseHandler(RequestHandler):
|
||||
if f and f.exception() is not None:
|
||||
# failed, don't add to the proxy
|
||||
return
|
||||
toc = IOLoop.current().time()
|
||||
self.log.info("User %s server took %.3f seconds to start", user.name, toc-tic)
|
||||
yield self.proxy.add_user(user)
|
||||
user.spawner.add_poll_callback(self.user_stopped, user)
|
||||
|
||||
@@ -215,8 +225,36 @@ class BaseHandler(RequestHandler):
|
||||
|
||||
@gen.coroutine
|
||||
def stop_single_user(self, user):
|
||||
if user.stop_pending:
|
||||
raise RuntimeError("Stop already pending for: %s" % user.name)
|
||||
tic = IOLoop.current().time()
|
||||
f = user.stop()
|
||||
yield self.proxy.delete_user(user)
|
||||
yield user.stop()
|
||||
@gen.coroutine
|
||||
def finish_stop(f=None):
|
||||
"""Finish the stop action by noticing that the user is stopped.
|
||||
|
||||
If the spawner is slow to stop, this is passed as an async callback,
|
||||
otherwise it is called immediately.
|
||||
"""
|
||||
if f and f.exception() is not None:
|
||||
# failed, don't do anything
|
||||
return
|
||||
toc = IOLoop.current().time()
|
||||
self.log.info("User %s server took %.3f seconds to stop", user.name, toc-tic)
|
||||
|
||||
try:
|
||||
yield gen.with_timeout(timedelta(seconds=self.slow_stop_timeout), f)
|
||||
except gen.TimeoutError:
|
||||
if user.stop_pending:
|
||||
# hit timeout, but stop is still pending
|
||||
self.log.warn("User %s server is slow to stop", user.name)
|
||||
# schedule finish for when the server finishes stopping
|
||||
IOLoop.current().add_future(f, finish_stop)
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
yield finish_stop()
|
||||
|
||||
#---------------------------------------------------------------
|
||||
# template rendering
|
||||
|
@@ -251,6 +251,7 @@ class User(Base):
|
||||
state = Column(JSONDict)
|
||||
spawner = None
|
||||
spawn_pending = False
|
||||
stop_pending = False
|
||||
|
||||
def __repr__(self):
|
||||
if self.server:
|
||||
@@ -362,14 +363,18 @@ class User(Base):
|
||||
if self.spawner is None:
|
||||
return
|
||||
self.spawner.stop_polling()
|
||||
status = yield self.spawner.poll()
|
||||
if status is None:
|
||||
yield self.spawner.stop()
|
||||
self.spawner.clear_state()
|
||||
self.state = self.spawner.get_state()
|
||||
self.last_activity = datetime.utcnow()
|
||||
self.server = None
|
||||
inspect(self).session.commit()
|
||||
self.stop_pending = True
|
||||
try:
|
||||
status = yield self.spawner.poll()
|
||||
if status is None:
|
||||
yield self.spawner.stop()
|
||||
self.spawner.clear_state()
|
||||
self.state = self.spawner.get_state()
|
||||
self.last_activity = datetime.utcnow()
|
||||
self.server = None
|
||||
inspect(self).session.commit()
|
||||
finally:
|
||||
self.stop_pending = False
|
||||
|
||||
|
||||
class APIToken(Base):
|
||||
|
@@ -49,8 +49,13 @@ class SlowSpawner(MockSpawner):
|
||||
|
||||
@gen.coroutine
|
||||
def start(self):
|
||||
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=5))
|
||||
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=2))
|
||||
yield super().start()
|
||||
|
||||
@gen.coroutine
|
||||
def stop(self):
|
||||
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=2))
|
||||
yield super().stop()
|
||||
|
||||
|
||||
class NeverSpawner(MockSpawner):
|
||||
|
@@ -207,25 +207,53 @@ def test_spawn(app, io_loop):
|
||||
def test_slow_spawn(app, io_loop):
|
||||
app.tornado_application.settings['spawner_class'] = mocking.SlowSpawner
|
||||
app.tornado_application.settings['slow_spawn_timeout'] = 0
|
||||
app.tornado_application.settings['slow_stop_timeout'] = 0
|
||||
|
||||
db = app.db
|
||||
name = 'zoe'
|
||||
user = add_user(db, name=name)
|
||||
r = api_request(app, 'users', name, 'server', method='post')
|
||||
r.raise_for_status()
|
||||
assert r.status_code == 202
|
||||
assert user.spawner is not None
|
||||
assert user.spawn_pending
|
||||
assert not user.stop_pending
|
||||
|
||||
dt = timedelta(seconds=0.1)
|
||||
@gen.coroutine
|
||||
def wait_pending():
|
||||
def wait_spawn():
|
||||
while user.spawn_pending:
|
||||
yield gen.Task(io_loop.add_timeout, dt)
|
||||
|
||||
io_loop.run_sync(wait_pending)
|
||||
io_loop.run_sync(wait_spawn)
|
||||
assert not user.spawn_pending
|
||||
status = io_loop.run_sync(user.spawner.poll)
|
||||
assert status is None
|
||||
|
||||
@gen.coroutine
|
||||
def wait_stop():
|
||||
while user.stop_pending:
|
||||
yield gen.Task(io_loop.add_timeout, dt)
|
||||
|
||||
r = api_request(app, 'users', name, 'server', method='delete')
|
||||
r.raise_for_status()
|
||||
assert r.status_code == 202
|
||||
assert user.spawner is not None
|
||||
assert user.stop_pending
|
||||
|
||||
r = api_request(app, 'users', name, 'server', method='delete')
|
||||
r.raise_for_status()
|
||||
assert r.status_code == 202
|
||||
assert user.spawner is not None
|
||||
assert user.stop_pending
|
||||
|
||||
io_loop.run_sync(wait_stop)
|
||||
assert not user.stop_pending
|
||||
assert user.spawner is not None
|
||||
r = api_request(app, 'users', name, 'server', method='delete')
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
def test_never_spawn(app, io_loop):
|
||||
app.tornado_application.settings['spawner_class'] = mocking.NeverSpawner
|
||||
app.tornado_application.settings['slow_spawn_timeout'] = 0
|
||||
|
Reference in New Issue
Block a user