parallelize startup poll

puts each check for a running spawner in a coroutine and runs them all concurrently.

Note: this will only improve performance when a large number of Spawners are running and `yield spawner.poll()` takes a nontrivial amount of time.
This is because these are coroutines, not threads. If instantiating Spawners themselves takes a long time, performance will not be affected.
This commit is contained in:
Min RK
2018-01-31 11:23:04 +01:00
parent dde7b5ea68
commit 6d6041a3c1

View File

@@ -1204,8 +1204,6 @@ class JupyterHub(Application):
def init_spawners(self):
db = self.db
user_summaries = ['']
def _user_summary(user):
parts = ['{0: >8}'.format(user.name)]
if user.admin:
@@ -1225,10 +1223,8 @@ class JupyterHub(Application):
yield self.proxy.delete_user(user, server_name)
yield user.stop(server_name)
for orm_user in db.query(orm.User):
self.users[orm_user.id] = user = User(orm_user, self.tornado_settings)
self.log.debug("Loading state for %s from db", user.name)
for name, spawner in user.spawners.items():
@gen.coroutine
def check_spawner(user, name, spawner):
status = 0
if spawner.server:
try:
@@ -1253,11 +1249,23 @@ class JupyterHub(Application):
spawner.server = None
else:
self.log.debug("%s not running", spawner._log_name)
db.commit()
user_summaries.append(_user_summary(user))
# parallelize checks for running Spawners
check_futures = []
for orm_user in db.query(orm.User):
self.users[orm_user.id] = user = User(orm_user, self.tornado_settings)
self.log.debug("Loading state for %s from db", user.name)
for name, spawner in user.spawners.items():
f = check_spawner(user, name, spawner)
check_futures.append(f)
self.log.debug("Loaded users: %s", '\n'.join(user_summaries))
# await checks after submitting them all
for f in check_futures:
yield f
user_summaries = map(_user_summary, self.users.values())
self.log.debug("Loaded users:\n%s", '\n'.join(user_summaries))
db.commit()
def init_oauth(self):