Merge pull request #62 from minrk/shutdown-state

adjustments to Spawner.stop
This commit is contained in:
Min RK
2014-10-14 11:56:24 -07:00
5 changed files with 125 additions and 75 deletions

View File

@@ -448,20 +448,25 @@ class JupyterHubApp(Application):
for user in db.query(orm.User):
if not user.state:
# without spawner state, server isn't valid
user.server = None
user_summaries.append(_user_summary(user))
continue
self.log.debug("Loading state for %s from db", user.name)
spawner = self.spawner_class.fromJSON(user.state, user=user, hub=self.hub, config=self.config)
user.spawner = spawner = self.spawner_class(
user=user, hub=self.hub, config=self.config,
)
status = run_sync(spawner.poll)
if status is None:
self.log.info("User %s still running", user.name)
user.spawner = spawner
self.log.info("%s still running", user.name)
spawner.add_poll_callback(user_stopped, user)
spawner.start_polling()
else:
self.log.warn("Failed to load state for %s, assuming server is not running.", user.name)
# not running, state is invalid
user.state = {}
# user not running. This is expected if server is None,
# but indicates the user's server died while the Hub wasn't running
# if user.server is defined.
log = self.log.warn if user.server else self.log.debug
log("%s not running.", user.name)
user.server = None
user_summaries.append(_user_summary(user))
@@ -508,7 +513,8 @@ class JupyterHubApp(Application):
'--api-port', str(self.proxy.api_server.port),
'--default-target', self.hub.server.host,
]
if self.log_level == logging.DEBUG:
if False:
# if self.log_level == logging.DEBUG:
cmd.extend(['--log-level', 'debug'])
if self.ssl_key:
cmd.extend(['--ssl-key', self.ssl_key])

View File

@@ -306,13 +306,18 @@ class User(Base):
db.add(api_token)
db.commit()
spawner = self.spawner = spawner_class(
config=config,
user=self,
hub=hub,
api_token=api_token.token,
)
# we are starting a new server, make sure it doesn't restore state
spawner.clear_state()
yield spawner.start()
spawner.start_polling()
# store state
self.state = spawner.get_state()
@@ -324,14 +329,19 @@ class User(Base):
@gen.coroutine
def stop(self):
"""Stop the user's spawner"""
"""Stop the user's spawner
and cleanup after it.
"""
if self.spawner is None:
return
self.spawner.stop_polling()
status = yield self.spawner.poll()
if status is None:
yield self.spawner.stop()
self.state = {}
self.spawner = None
self.spawner.clear_state()
self.state = self.spawner.get_state()
self.last_activity = datetime.utcnow()
self.server = None
inspect(self).session.commit()

View File

@@ -14,7 +14,7 @@ from tornado.ioloop import IOLoop, PeriodicCallback
from IPython.config import LoggingConfigurable
from IPython.utils.traitlets import (
Any, Bool, Dict, Enum, Instance, Integer, List, Unicode,
Any, Bool, Dict, Enum, Instance, Integer, Float, List, Unicode,
)
from .utils import random_port
@@ -80,15 +80,10 @@ class Spawner(LoggingConfigurable):
help="""The command used for starting notebooks."""
)
@classmethod
def fromJSON(cls, state, **kwargs):
"""Create a new instance, and load its JSON state
state will be a dict, loaded from JSON in the database.
"""
inst = cls(**kwargs)
inst.load_state(state)
return inst
def __init__(self, **kwargs):
super(Spawner, self).__init__(**kwargs)
if self.user.state:
self.load_state(self.user.state)
def load_state(self, state):
"""load state from the database
@@ -96,18 +91,21 @@ class Spawner(LoggingConfigurable):
This is the extensible part of state
Override in a subclass if there is state to load.
Should call `super`.
See Also
--------
get_state
get_state, clear_state
"""
pass
if 'api_token' in state:
self.api_token = state['api_token']
def get_state(self):
"""store the state necessary for load_state
A black box of extra state for custom spawners
A black box of extra state for custom spawners.
Should call `super`.
Returns
-------
@@ -115,7 +113,19 @@ class Spawner(LoggingConfigurable):
state: dict
a JSONable dict of state
"""
return dict(api_token=self.api_token)
state = {}
if self.api_token:
state['api_token'] = self.api_token
return state
def clear_state(self):
"""clear any state that should be cleared when the process stops
State that should be preserved across server instances should not be cleared.
Subclasses should call super, to ensure that state is properly cleared.
"""
self.api_token = ''
def get_args(self):
"""Return the arguments to be passed after self.cmd"""
@@ -201,6 +211,18 @@ class Spawner(LoggingConfigurable):
for callback in self._callbacks:
add_callback(callback)
death_interval = Float(0.1)
@gen.coroutine
def wait_for_death(self, timeout=10):
"""wait for the process to die, up to timeout seconds"""
loop = IOLoop.current()
for i in range(int(timeout / self.death_interval)):
status = yield self.poll()
if status is not None:
break
else:
yield gen.Task(loop.add_timeout, loop.time() + self.death_interval)
def set_user_setuid(username):
"""return a preexec_fn for setting the user (via setuid) of a spawned process"""
@@ -251,7 +273,7 @@ class LocalProcessSpawner(Spawner):
)
proc = Instance(Popen)
pid = Integer()
pid = Integer(0)
sudo_args = List(['-n'], config=True,
help="""arguments to be passed to sudo (in addition to -u [username])
@@ -277,14 +299,23 @@ class LocalProcessSpawner(Spawner):
raise ValueError("This should be impossible")
def load_state(self, state):
"""load pid from state"""
super(LocalProcessSpawner, self).load_state(state)
self.pid = state['pid']
if 'pid' in state:
self.pid = state['pid']
def get_state(self):
"""add pid to state"""
state = super(LocalProcessSpawner, self).get_state()
state['pid'] = self.pid
if self.pid:
state['pid'] = self.pid
return state
def clear_state(self):
"""clear pid state"""
super(LocalProcessSpawner, self).clear_state()
self.pid = 0
def sudo_cmd(self, user):
return ['sudo', '-u', user.name] + self.sudo_args
@@ -311,39 +342,48 @@ class LocalProcessSpawner(Spawner):
preexec_fn=self.make_preexec_fn(self.user.name),
)
self.pid = self.proc.pid
self.start_polling()
@gen.coroutine
def poll(self):
"""Poll the process"""
# if we started the process, poll with Popen
if self.proc is not None:
raise gen.Return(self.proc.poll())
status = self.proc.poll()
if status is not None:
# clear state if the process is done
self.clear_state()
raise gen.Return(status)
# if we resumed from stored state,
# we don't have the Popen handle anymore
# we don't have the Popen handle anymore, so rely on self.pid
if not self.pid:
# no pid, not running
self.clear_state()
raise gen.Return(0)
# send signal 0 to check if PID exists
# this doesn't work on Windows, but that's okay because we don't support Windows.
try:
os.kill(self.pid, 0)
except OSError as e:
if e.errno == errno.ESRCH:
# no such process, return exitcode == 0, since we don't know the exit status
raise gen.Return(0)
alive = self._signal(0)
if not alive:
self.clear_state()
raise gen.Return(0)
else:
# None indicates the process is running
raise gen.Return(None)
@gen.coroutine
def _wait_for_death(self, timeout=10):
"""wait for the process to die, up to timeout seconds"""
for i in range(int(timeout * 10)):
status = yield self.poll()
if status is not None:
break
def _signal(self, sig):
"""send a signal, and ignore ERSCH because it just means it already died
returns bool for whether the process existed to receive the signal.
"""
try:
os.kill(self.pid, sig)
except OSError as e:
if e.errno == errno.ESRCH:
return False # process is gone
else:
loop = IOLoop.current()
yield gen.Task(loop.add_timeout, loop.time() + 0.1)
raise
return True # process exists
@gen.coroutine
def stop(self, now=False):
@@ -351,39 +391,29 @@ class LocalProcessSpawner(Spawner):
if `now`, skip waiting for clean shutdown
"""
self.stop_polling()
if not now:
# SIGINT to request clean shutdown
status = yield self.poll()
if status is not None:
return
self.log.debug("Interrupting %i", self.pid)
try:
os.kill(self.pid, signal.SIGINT)
except OSError as e:
if e.errno == errno.ESRCH:
return
yield self._wait_for_death(self.INTERRUPT_TIMEOUT)
self._signal(signal.SIGINT)
yield self.wait_for_death(self.INTERRUPT_TIMEOUT)
# clean shutdown failed, use TERM
status = yield self.poll()
if status is None:
self.log.debug("Terminating %i", self.pid)
try:
os.kill(self.pid, signal.SIGTERM)
except OSError as e:
if e.errno == errno.ESRCH:
return
yield self._wait_for_death(self.TERM_TIMEOUT)
if status is not None:
return
self.log.debug("Terminating %i", self.pid)
self._signal(signal.SIGTERM)
yield self.wait_for_death(self.TERM_TIMEOUT)
# TERM failed, use KILL
status = yield self.poll()
if status is None:
self.log.debug("Killing %i", self.pid)
try:
os.kill(self.pid, signal.SIGKILL)
except OSError as e:
if e.errno == errno.ESRCH:
return
yield self._wait_for_death(self.KILL_TIMEOUT)
if status is not None:
return
self.log.debug("Killing %i", self.pid)
self._signal(signal.SIGKILL)
yield self.wait_for_death(self.KILL_TIMEOUT)
status = yield self.poll()
if status is None:

View File

@@ -4,6 +4,7 @@
# Distributed under the terms of the Modified BSD License.
import getpass
import logging
from pytest import fixture
from tornado import ioloop
@@ -45,7 +46,7 @@ def io_loop():
@fixture(scope='module')
def app(request):
app = MockHubApp()
app = MockHubApp.instance(log_level=logging.DEBUG)
app.start([])
request.addfinalizer(app.stop)
return app

View File

@@ -155,6 +155,7 @@ def test_spawn(app, io_loop):
user = add_user(db, name=name)
r = api_request(app, 'users', name, 'server', method='post')
assert r.status_code == 201
assert 'pid' in user.state
assert user.spawner is not None
status = io_loop.run_sync(user.spawner.poll)
assert status is None
@@ -173,5 +174,7 @@ def test_spawn(app, io_loop):
r = api_request(app, 'users', name, 'server', method='delete')
assert r.status_code == 204
assert user.spawner is None
assert 'pid' not in user.state
status = io_loop.run_sync(user.spawner.poll)
assert status == 0