mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-13 13:03:01 +00:00
handle Spawners that are slow to stop
e.g. docker, which can take a long time to stop, especially if several docker actions are already queued. Use status `202: Accepted` for API replies sent with spawn/stop still pending
This commit is contained in:
@@ -17,7 +17,7 @@ class BaseUserHandler(APIHandler):
|
|||||||
return {
|
return {
|
||||||
'name': user.name,
|
'name': user.name,
|
||||||
'admin': user.admin,
|
'admin': user.admin,
|
||||||
'server': user.server.base_url if user.server else None,
|
'server': user.server.base_url if user.server and not (user.spawn_pending or user.stop_pending) else None,
|
||||||
'last_activity': user.last_activity.isoformat(),
|
'last_activity': user.last_activity.isoformat(),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -101,8 +101,12 @@ class UserAPIHandler(BaseUserHandler):
|
|||||||
raise web.HTTPError(404)
|
raise web.HTTPError(404)
|
||||||
if user.name == self.get_current_user().name:
|
if user.name == self.get_current_user().name:
|
||||||
raise web.HTTPError(400, "Cannot delete yourself!")
|
raise web.HTTPError(400, "Cannot delete yourself!")
|
||||||
|
if user.stop_pending:
|
||||||
|
raise web.HTTPError(400, "%s's server is in the process of stopping, please wait." % name)
|
||||||
if user.spawner is not None:
|
if user.spawner is not None:
|
||||||
yield self.stop_single_user(user)
|
yield self.stop_single_user(user)
|
||||||
|
if user.stop_pending:
|
||||||
|
raise web.HTTPError(400, "%s's server is in the process of stopping, please wait." % name)
|
||||||
|
|
||||||
yield gen.maybe_future(self.authenticator.delete_user(user))
|
yield gen.maybe_future(self.authenticator.delete_user(user))
|
||||||
|
|
||||||
@@ -136,16 +140,24 @@ class UserServerAPIHandler(BaseUserHandler):
|
|||||||
raise web.HTTPError(400, "%s's server is already running" % name)
|
raise web.HTTPError(400, "%s's server is already running" % name)
|
||||||
|
|
||||||
yield self.spawn_single_user(user)
|
yield self.spawn_single_user(user)
|
||||||
self.set_status(201)
|
status = 202 if user.spawn_pending else 201
|
||||||
|
self.set_status(status)
|
||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
@admin_or_self
|
@admin_or_self
|
||||||
def delete(self, name):
|
def delete(self, name):
|
||||||
user = self.find_user(name)
|
user = self.find_user(name)
|
||||||
|
if user.stop_pending:
|
||||||
|
self.set_status(202)
|
||||||
|
return
|
||||||
if user.spawner is None:
|
if user.spawner is None:
|
||||||
raise web.HTTPError(400, "%s's server is not running" % name)
|
raise web.HTTPError(400, "%s's server is not running" % name)
|
||||||
|
status = yield user.spawner.poll()
|
||||||
|
if status is not None:
|
||||||
|
raise web.HTTPError(400, "%s's server is not running" % name)
|
||||||
yield self.stop_single_user(user)
|
yield self.stop_single_user(user)
|
||||||
self.set_status(204)
|
status = 202 if user.stop_pending else 204
|
||||||
|
self.set_status(status)
|
||||||
|
|
||||||
default_handlers = [
|
default_handlers = [
|
||||||
(r"/api/users", UserListAPIHandler),
|
(r"/api/users", UserListAPIHandler),
|
||||||
|
@@ -163,12 +163,20 @@ class BaseHandler(RequestHandler):
|
|||||||
def slow_spawn_timeout(self):
|
def slow_spawn_timeout(self):
|
||||||
return self.settings.get('slow_spawn_timeout', 10)
|
return self.settings.get('slow_spawn_timeout', 10)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def slow_stop_timeout(self):
|
||||||
|
return self.settings.get('slow_stop_timeout', 10)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def spawner_class(self):
|
def spawner_class(self):
|
||||||
return self.settings.get('spawner_class', LocalProcessSpawner)
|
return self.settings.get('spawner_class', LocalProcessSpawner)
|
||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
def spawn_single_user(self, user):
|
def spawn_single_user(self, user):
|
||||||
|
if user.spawn_pending:
|
||||||
|
raise RuntimeError("Spawn already pending for: %s" % user.name)
|
||||||
|
tic = IOLoop.current().time()
|
||||||
|
|
||||||
f = user.spawn(
|
f = user.spawn(
|
||||||
spawner_class=self.spawner_class,
|
spawner_class=self.spawner_class,
|
||||||
base_url=self.base_url,
|
base_url=self.base_url,
|
||||||
@@ -185,6 +193,8 @@ class BaseHandler(RequestHandler):
|
|||||||
if f and f.exception() is not None:
|
if f and f.exception() is not None:
|
||||||
# failed, don't add to the proxy
|
# failed, don't add to the proxy
|
||||||
return
|
return
|
||||||
|
toc = IOLoop.current().time()
|
||||||
|
self.log.info("User %s server took %.3f seconds to start", user.name, toc-tic)
|
||||||
yield self.proxy.add_user(user)
|
yield self.proxy.add_user(user)
|
||||||
user.spawner.add_poll_callback(self.user_stopped, user)
|
user.spawner.add_poll_callback(self.user_stopped, user)
|
||||||
|
|
||||||
@@ -215,8 +225,36 @@ class BaseHandler(RequestHandler):
|
|||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
def stop_single_user(self, user):
|
def stop_single_user(self, user):
|
||||||
|
if user.stop_pending:
|
||||||
|
raise RuntimeError("Stop already pending for: %s" % user.name)
|
||||||
|
tic = IOLoop.current().time()
|
||||||
|
f = user.stop()
|
||||||
yield self.proxy.delete_user(user)
|
yield self.proxy.delete_user(user)
|
||||||
yield user.stop()
|
@gen.coroutine
|
||||||
|
def finish_stop(f=None):
|
||||||
|
"""Finish the stop action by noticing that the user is stopped.
|
||||||
|
|
||||||
|
If the spawner is slow to stop, this is passed as an async callback,
|
||||||
|
otherwise it is called immediately.
|
||||||
|
"""
|
||||||
|
if f and f.exception() is not None:
|
||||||
|
# failed, don't do anything
|
||||||
|
return
|
||||||
|
toc = IOLoop.current().time()
|
||||||
|
self.log.info("User %s server took %.3f seconds to stop", user.name, toc-tic)
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield gen.with_timeout(timedelta(seconds=self.slow_stop_timeout), f)
|
||||||
|
except gen.TimeoutError:
|
||||||
|
if user.stop_pending:
|
||||||
|
# hit timeout, but stop is still pending
|
||||||
|
self.log.warn("User %s server is slow to stop", user.name)
|
||||||
|
# schedule finish for when the server finishes stopping
|
||||||
|
IOLoop.current().add_future(f, finish_stop)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
yield finish_stop()
|
||||||
|
|
||||||
#---------------------------------------------------------------
|
#---------------------------------------------------------------
|
||||||
# template rendering
|
# template rendering
|
||||||
|
@@ -251,6 +251,7 @@ class User(Base):
|
|||||||
state = Column(JSONDict)
|
state = Column(JSONDict)
|
||||||
spawner = None
|
spawner = None
|
||||||
spawn_pending = False
|
spawn_pending = False
|
||||||
|
stop_pending = False
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if self.server:
|
if self.server:
|
||||||
@@ -362,14 +363,18 @@ class User(Base):
|
|||||||
if self.spawner is None:
|
if self.spawner is None:
|
||||||
return
|
return
|
||||||
self.spawner.stop_polling()
|
self.spawner.stop_polling()
|
||||||
status = yield self.spawner.poll()
|
self.stop_pending = True
|
||||||
if status is None:
|
try:
|
||||||
yield self.spawner.stop()
|
status = yield self.spawner.poll()
|
||||||
self.spawner.clear_state()
|
if status is None:
|
||||||
self.state = self.spawner.get_state()
|
yield self.spawner.stop()
|
||||||
self.last_activity = datetime.utcnow()
|
self.spawner.clear_state()
|
||||||
self.server = None
|
self.state = self.spawner.get_state()
|
||||||
inspect(self).session.commit()
|
self.last_activity = datetime.utcnow()
|
||||||
|
self.server = None
|
||||||
|
inspect(self).session.commit()
|
||||||
|
finally:
|
||||||
|
self.stop_pending = False
|
||||||
|
|
||||||
|
|
||||||
class APIToken(Base):
|
class APIToken(Base):
|
||||||
|
@@ -49,9 +49,14 @@ class SlowSpawner(MockSpawner):
|
|||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
def start(self):
|
def start(self):
|
||||||
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=5))
|
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=2))
|
||||||
yield super().start()
|
yield super().start()
|
||||||
|
|
||||||
|
@gen.coroutine
|
||||||
|
def stop(self):
|
||||||
|
yield gen.Task(IOLoop.current().add_timeout, timedelta(seconds=2))
|
||||||
|
yield super().stop()
|
||||||
|
|
||||||
|
|
||||||
class NeverSpawner(MockSpawner):
|
class NeverSpawner(MockSpawner):
|
||||||
"""A spawner that will never start"""
|
"""A spawner that will never start"""
|
||||||
|
@@ -207,25 +207,53 @@ def test_spawn(app, io_loop):
|
|||||||
def test_slow_spawn(app, io_loop):
|
def test_slow_spawn(app, io_loop):
|
||||||
app.tornado_application.settings['spawner_class'] = mocking.SlowSpawner
|
app.tornado_application.settings['spawner_class'] = mocking.SlowSpawner
|
||||||
app.tornado_application.settings['slow_spawn_timeout'] = 0
|
app.tornado_application.settings['slow_spawn_timeout'] = 0
|
||||||
|
app.tornado_application.settings['slow_stop_timeout'] = 0
|
||||||
|
|
||||||
db = app.db
|
db = app.db
|
||||||
name = 'zoe'
|
name = 'zoe'
|
||||||
user = add_user(db, name=name)
|
user = add_user(db, name=name)
|
||||||
r = api_request(app, 'users', name, 'server', method='post')
|
r = api_request(app, 'users', name, 'server', method='post')
|
||||||
|
r.raise_for_status()
|
||||||
|
assert r.status_code == 202
|
||||||
assert user.spawner is not None
|
assert user.spawner is not None
|
||||||
assert user.spawn_pending
|
assert user.spawn_pending
|
||||||
|
assert not user.stop_pending
|
||||||
|
|
||||||
dt = timedelta(seconds=0.1)
|
dt = timedelta(seconds=0.1)
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
def wait_pending():
|
def wait_spawn():
|
||||||
while user.spawn_pending:
|
while user.spawn_pending:
|
||||||
yield gen.Task(io_loop.add_timeout, dt)
|
yield gen.Task(io_loop.add_timeout, dt)
|
||||||
|
|
||||||
io_loop.run_sync(wait_pending)
|
io_loop.run_sync(wait_spawn)
|
||||||
assert not user.spawn_pending
|
assert not user.spawn_pending
|
||||||
status = io_loop.run_sync(user.spawner.poll)
|
status = io_loop.run_sync(user.spawner.poll)
|
||||||
assert status is None
|
assert status is None
|
||||||
|
|
||||||
|
@gen.coroutine
|
||||||
|
def wait_stop():
|
||||||
|
while user.stop_pending:
|
||||||
|
yield gen.Task(io_loop.add_timeout, dt)
|
||||||
|
|
||||||
|
r = api_request(app, 'users', name, 'server', method='delete')
|
||||||
|
r.raise_for_status()
|
||||||
|
assert r.status_code == 202
|
||||||
|
assert user.spawner is not None
|
||||||
|
assert user.stop_pending
|
||||||
|
|
||||||
|
r = api_request(app, 'users', name, 'server', method='delete')
|
||||||
|
r.raise_for_status()
|
||||||
|
assert r.status_code == 202
|
||||||
|
assert user.spawner is not None
|
||||||
|
assert user.stop_pending
|
||||||
|
|
||||||
|
io_loop.run_sync(wait_stop)
|
||||||
|
assert not user.stop_pending
|
||||||
|
assert user.spawner is not None
|
||||||
|
r = api_request(app, 'users', name, 'server', method='delete')
|
||||||
|
assert r.status_code == 400
|
||||||
|
|
||||||
|
|
||||||
def test_never_spawn(app, io_loop):
|
def test_never_spawn(app, io_loop):
|
||||||
app.tornado_application.settings['spawner_class'] = mocking.NeverSpawner
|
app.tornado_application.settings['spawner_class'] = mocking.NeverSpawner
|
||||||
app.tornado_application.settings['slow_spawn_timeout'] = 0
|
app.tornado_application.settings['slow_spawn_timeout'] = 0
|
||||||
|
Reference in New Issue
Block a user