From d8ef6d59c152f7630520fbc855b9435a1aa52f3c Mon Sep 17 00:00:00 2001 From: MinRK Date: Wed, 8 Oct 2014 13:18:31 -0700 Subject: [PATCH 1/3] adjustments to Spawner.stop - call start/stop_polling outside Spawner (avoids need for custom spawners to reimplement) - don't clear state when stopping Spawner (should enable spawners to resume) --- jupyterhub/orm.py | 11 +++-- jupyterhub/spawner.py | 95 ++++++++++++++++++++---------------- jupyterhub/tests/test_api.py | 5 +- 3 files changed, 66 insertions(+), 45 deletions(-) diff --git a/jupyterhub/orm.py b/jupyterhub/orm.py index 1354331a..9aa8a6af 100644 --- a/jupyterhub/orm.py +++ b/jupyterhub/orm.py @@ -313,6 +313,7 @@ class User(Base): api_token=api_token.token, ) yield spawner.start() + spawner.start_polling() # store state self.state = spawner.get_state() @@ -324,14 +325,18 @@ class User(Base): @gen.coroutine def stop(self): - """Stop the user's spawner""" + """Stop the user's spawner + + and cleanup after it. + """ if self.spawner is None: return + self.spawner.stop_polling() status = yield self.spawner.poll() if status is None: yield self.spawner.stop() - self.state = {} - self.spawner = None + self.state = self.spawner.get_state() + self.last_activity = datetime.utcnow() self.server = None inspect(self).session.commit() diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index 972b0b61..78ee31b4 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -14,7 +14,7 @@ from tornado.ioloop import IOLoop, PeriodicCallback from IPython.config import LoggingConfigurable from IPython.utils.traitlets import ( - Any, Bool, Dict, Enum, Instance, Integer, List, Unicode, + Any, Bool, Dict, Enum, Instance, Integer, Float, List, Unicode, ) from .utils import random_port @@ -93,9 +93,10 @@ class Spawner(LoggingConfigurable): def load_state(self, state): """load state from the database - This is the extensible part of state + This is the extensible part of state Override in a subclass if there is state to load. + Should call `super`. See Also -------- @@ -107,7 +108,8 @@ class Spawner(LoggingConfigurable): def get_state(self): """store the state necessary for load_state - A black box of extra state for custom spawners + A black box of extra state for custom spawners. + Should call `super`. Returns ------- @@ -200,6 +202,18 @@ class Spawner(LoggingConfigurable): add_callback = IOLoop.current().add_callback for callback in self._callbacks: add_callback(callback) + + death_interval = Float(0.1) + @gen.coroutine + def wait_for_death(self, timeout=10): + """wait for the process to die, up to timeout seconds""" + loop = IOLoop.current() + for i in range(int(timeout / self.poll_interval)): + status = yield self.poll() + if status is not None: + break + else: + yield gen.Task(loop.add_timeout, loop.time() + self.death_interval) def set_user_setuid(username): @@ -251,7 +265,7 @@ class LocalProcessSpawner(Spawner): ) proc = Instance(Popen) - pid = Integer() + pid = Integer(0) sudo_args = List(['-n'], config=True, help="""arguments to be passed to sudo (in addition to -u [username]) @@ -282,7 +296,8 @@ class LocalProcessSpawner(Spawner): def get_state(self): state = super(LocalProcessSpawner, self).get_state() - state['pid'] = self.pid + if self.pid: + state['pid'] = self.pid return state def sudo_cmd(self, user): @@ -311,17 +326,23 @@ class LocalProcessSpawner(Spawner): preexec_fn=self.make_preexec_fn(self.user.name), ) self.pid = self.proc.pid - self.start_polling() @gen.coroutine def poll(self): """Poll the process""" # if we started the process, poll with Popen if self.proc is not None: - raise gen.Return(self.proc.poll()) + status = self.proc.poll() + if status is not None: + # clear state if the process is done + self.pid = 0 + raise gen.Return(status) # if we resumed from stored state, # we don't have the Popen handle anymore + if not self.pid: + # no pid, not running + raise gen.Return(0) # this doesn't work on Windows, but that's okay because we don't support Windows. try: @@ -329,21 +350,23 @@ class LocalProcessSpawner(Spawner): except OSError as e: if e.errno == errno.ESRCH: # no such process, return exitcode == 0, since we don't know the exit status + self.pid = 0 raise gen.Return(0) + else: + raise else: # None indicates the process is running raise gen.Return(None) - @gen.coroutine - def _wait_for_death(self, timeout=10): - """wait for the process to die, up to timeout seconds""" - for i in range(int(timeout * 10)): - status = yield self.poll() - if status is not None: - break + def _signal(self, sig): + """send a signal, and ignore ERSCH because it just means it already died""" + try: + os.kill(self.pid, sig) + except OSError as e: + if e.errno == errno.ESRCH: + return else: - loop = IOLoop.current() - yield gen.Task(loop.add_timeout, loop.time() + 0.1) + raise @gen.coroutine def stop(self, now=False): @@ -351,39 +374,29 @@ class LocalProcessSpawner(Spawner): if `now`, skip waiting for clean shutdown """ - self.stop_polling() if not now: - # SIGINT to request clean shutdown + status = yield self.poll() + if status is not None: + return self.log.debug("Interrupting %i", self.pid) - try: - os.kill(self.pid, signal.SIGINT) - except OSError as e: - if e.errno == errno.ESRCH: - return - - yield self._wait_for_death(self.INTERRUPT_TIMEOUT) + self._signal(signal.SIGINT) + yield self.wait_for_death(self.INTERRUPT_TIMEOUT) # clean shutdown failed, use TERM status = yield self.poll() - if status is None: - self.log.debug("Terminating %i", self.pid) - try: - os.kill(self.pid, signal.SIGTERM) - except OSError as e: - if e.errno == errno.ESRCH: - return - yield self._wait_for_death(self.TERM_TIMEOUT) + if status is not None: + return + self.log.debug("Terminating %i", self.pid) + self._signal(signal.SIGTERM) + yield self.wait_for_death(self.TERM_TIMEOUT) # TERM failed, use KILL status = yield self.poll() - if status is None: - self.log.debug("Killing %i", self.pid) - try: - os.kill(self.pid, signal.SIGKILL) - except OSError as e: - if e.errno == errno.ESRCH: - return - yield self._wait_for_death(self.KILL_TIMEOUT) + if status is not None: + return + self.log.debug("Killing %i", self.pid) + self._signal(signal.SIGKILL) + yield self.wait_for_death(self.KILL_TIMEOUT) status = yield self.poll() if status is None: diff --git a/jupyterhub/tests/test_api.py b/jupyterhub/tests/test_api.py index b4c18404..90b0ab9b 100644 --- a/jupyterhub/tests/test_api.py +++ b/jupyterhub/tests/test_api.py @@ -155,6 +155,7 @@ def test_spawn(app, io_loop): user = add_user(db, name=name) r = api_request(app, 'users', name, 'server', method='post') assert r.status_code == 201 + assert 'pid' in user.state assert user.spawner is not None status = io_loop.run_sync(user.spawner.poll) assert status is None @@ -173,5 +174,7 @@ def test_spawn(app, io_loop): r = api_request(app, 'users', name, 'server', method='delete') assert r.status_code == 204 - assert user.spawner is None + assert 'pid' not in user.state + status = io_loop.run_sync(user.spawner.poll) + assert status == 0 \ No newline at end of file From a8548164cddb4836a0c6e54cd63f306ee92d5dfb Mon Sep 17 00:00:00 2001 From: MinRK Date: Fri, 10 Oct 2014 12:28:27 -0700 Subject: [PATCH 2/3] remove Spawner.fromJSON load state on `__init__` instead Makes more sense now that state can persist across server instances (e.g. docker container_id) --- jupyterhub/app.py | 20 +++++++----- jupyterhub/orm.py | 7 ++++- jupyterhub/spawner.py | 73 ++++++++++++++++++++++++++----------------- 3 files changed, 64 insertions(+), 36 deletions(-) diff --git a/jupyterhub/app.py b/jupyterhub/app.py index 657288bf..7e47a72f 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -448,20 +448,25 @@ class JupyterHubApp(Application): for user in db.query(orm.User): if not user.state: + # without spawner state, server isn't valid + user.server = None user_summaries.append(_user_summary(user)) continue self.log.debug("Loading state for %s from db", user.name) - spawner = self.spawner_class.fromJSON(user.state, user=user, hub=self.hub, config=self.config) + user.spawner = spawner = self.spawner_class( + user=user, hub=self.hub, config=self.config, + ) status = run_sync(spawner.poll) if status is None: - self.log.info("User %s still running", user.name) - user.spawner = spawner + self.log.info("%s still running", user.name) spawner.add_poll_callback(user_stopped, user) spawner.start_polling() else: - self.log.warn("Failed to load state for %s, assuming server is not running.", user.name) - # not running, state is invalid - user.state = {} + # user not running. This is expected if server is None, + # but indicates the user's server died while the Hub wasn't running + # if user.server is defined. + log = self.log.warn if user.server else self.log.debug + log("%s not running.", user.name) user.server = None user_summaries.append(_user_summary(user)) @@ -508,7 +513,8 @@ class JupyterHubApp(Application): '--api-port', str(self.proxy.api_server.port), '--default-target', self.hub.server.host, ] - if self.log_level == logging.DEBUG: + if False: + # if self.log_level == logging.DEBUG: cmd.extend(['--log-level', 'debug']) if self.ssl_key: cmd.extend(['--ssl-key', self.ssl_key]) diff --git a/jupyterhub/orm.py b/jupyterhub/orm.py index 9aa8a6af..b26bc6c1 100644 --- a/jupyterhub/orm.py +++ b/jupyterhub/orm.py @@ -305,13 +305,17 @@ class User(Base): api_token = self.new_api_token() db.add(api_token) db.commit() - + + spawner = self.spawner = spawner_class( config=config, user=self, hub=hub, api_token=api_token.token, ) + # we are starting a new server, make sure it doesn't restore state + spawner.clear_state() + yield spawner.start() spawner.start_polling() @@ -335,6 +339,7 @@ class User(Base): status = yield self.spawner.poll() if status is None: yield self.spawner.stop() + self.spawner.clear_state() self.state = self.spawner.get_state() self.last_activity = datetime.utcnow() self.server = None diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index 78ee31b4..0be6177a 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -80,15 +80,10 @@ class Spawner(LoggingConfigurable): help="""The command used for starting notebooks.""" ) - @classmethod - def fromJSON(cls, state, **kwargs): - """Create a new instance, and load its JSON state - - state will be a dict, loaded from JSON in the database. - """ - inst = cls(**kwargs) - inst.load_state(state) - return inst + def __init__(self, **kwargs): + super(Spawner, self).__init__(**kwargs) + if self.user.state: + self.load_state(self.user.state) def load_state(self, state): """load state from the database @@ -101,9 +96,10 @@ class Spawner(LoggingConfigurable): See Also -------- - get_state + get_state, clear_state """ - pass + if 'api_token' in state: + self.api_token = state['api_token'] def get_state(self): """store the state necessary for load_state @@ -117,7 +113,19 @@ class Spawner(LoggingConfigurable): state: dict a JSONable dict of state """ - return dict(api_token=self.api_token) + state = {} + if self.api_token: + state['api_token'] = self.api_token + return state + + def clear_state(self): + """clear any state that should be cleared when the process stops + + State that should be preserved across server instances should not be cleared. + + Subclasses should call super, to ensure that state is properly cleared. + """ + self.api_token = '' def get_args(self): """Return the arguments to be passed after self.cmd""" @@ -208,7 +216,7 @@ class Spawner(LoggingConfigurable): def wait_for_death(self, timeout=10): """wait for the process to die, up to timeout seconds""" loop = IOLoop.current() - for i in range(int(timeout / self.poll_interval)): + for i in range(int(timeout / self.death_interval)): status = yield self.poll() if status is not None: break @@ -291,15 +299,23 @@ class LocalProcessSpawner(Spawner): raise ValueError("This should be impossible") def load_state(self, state): + """load pid from state""" super(LocalProcessSpawner, self).load_state(state) - self.pid = state['pid'] + if 'pid' in state: + self.pid = state['pid'] def get_state(self): + """add pid to state""" state = super(LocalProcessSpawner, self).get_state() if self.pid: state['pid'] = self.pid return state + def clear_state(self): + """clear pid state""" + super(LocalProcessSpawner, self).clear_state() + self.pid = 0 + def sudo_cmd(self, user): return ['sudo', '-u', user.name] + self.sudo_args @@ -335,38 +351,39 @@ class LocalProcessSpawner(Spawner): status = self.proc.poll() if status is not None: # clear state if the process is done - self.pid = 0 + self.clear_state() raise gen.Return(status) # if we resumed from stored state, - # we don't have the Popen handle anymore + # we don't have the Popen handle anymore, so rely on self.pid + if not self.pid: # no pid, not running + self.clear_state() raise gen.Return(0) + # send signal 0 to check if PID exists # this doesn't work on Windows, but that's okay because we don't support Windows. - try: - os.kill(self.pid, 0) - except OSError as e: - if e.errno == errno.ESRCH: - # no such process, return exitcode == 0, since we don't know the exit status - self.pid = 0 - raise gen.Return(0) - else: - raise + alive = self._signal(0) + if not alive: + self.clear_state() + raise gen.Return(0) else: - # None indicates the process is running raise gen.Return(None) def _signal(self, sig): - """send a signal, and ignore ERSCH because it just means it already died""" + """send a signal, and ignore ERSCH because it just means it already died + + returns bool for whether the process existed to receive the signal. + """ try: os.kill(self.pid, sig) except OSError as e: if e.errno == errno.ESRCH: - return + return False # process is gone else: raise + return True # process exists @gen.coroutine def stop(self, now=False): From 58f4d39372840316be9bdadfa896034001d9070d Mon Sep 17 00:00:00 2001 From: MinRK Date: Tue, 14 Oct 2014 11:44:57 -0700 Subject: [PATCH 3/3] use debug logging during testing --- jupyterhub/tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jupyterhub/tests/conftest.py b/jupyterhub/tests/conftest.py index afe73763..d8a4a5a5 100644 --- a/jupyterhub/tests/conftest.py +++ b/jupyterhub/tests/conftest.py @@ -4,6 +4,7 @@ # Distributed under the terms of the Modified BSD License. import getpass +import logging from pytest import fixture from tornado import ioloop @@ -45,7 +46,7 @@ def io_loop(): @fixture(scope='module') def app(request): - app = MockHubApp() + app = MockHubApp.instance(log_level=logging.DEBUG) app.start([]) request.addfinalizer(app.stop) return app