make init_spawners check O(running servers) not O(total users)

query on Server objects instead of User objects

avoids lots of ORM work on startup since there are typically a small number of running servers
relative to the total number of users

this also means that the users dict is not fully populated. Is that okay? I hope so.
This commit is contained in:
Min RK
2020-02-18 17:10:19 +01:00
parent bc7bb5076f
commit 79a51dfdce
2 changed files with 26 additions and 17 deletions

View File

@@ -1670,9 +1670,12 @@ class JupyterHub(Application):
# This lets whitelist be used to set up initial list, # This lets whitelist be used to set up initial list,
# but changes to the whitelist can occur in the database, # but changes to the whitelist can occur in the database,
# and persist across sessions. # and persist across sessions.
total_users = 0
for user in db.query(orm.User): for user in db.query(orm.User):
try: try:
await maybe_future(self.authenticator.add_user(user)) f = self.authenticator.add_user(user)
if f:
await maybe_future()
except Exception: except Exception:
self.log.exception("Error adding user %s already in db", user.name) self.log.exception("Error adding user %s already in db", user.name)
if self.authenticator.delete_invalid_users: if self.authenticator.delete_invalid_users:
@@ -1694,6 +1697,7 @@ class JupyterHub(Application):
) )
) )
else: else:
total_users += 1
# handle database upgrades where user.created is undefined. # handle database upgrades where user.created is undefined.
# we don't want to allow user.created to be undefined, # we don't want to allow user.created to be undefined,
# so initialize it to last_activity (if defined) or now. # so initialize it to last_activity (if defined) or now.
@@ -1705,6 +1709,8 @@ class JupyterHub(Application):
# From this point on, any user changes should be done simultaneously # From this point on, any user changes should be done simultaneously
# to the whitelist set and user db, unless the whitelist is empty (all users allowed). # to the whitelist set and user db, unless the whitelist is empty (all users allowed).
TOTAL_USERS.set(total_users)
async def init_groups(self): async def init_groups(self):
"""Load predefined groups into the database""" """Load predefined groups into the database"""
db = self.db db = self.db
@@ -2005,21 +2011,24 @@ class JupyterHub(Application):
spawner._check_pending = False spawner._check_pending = False
# parallelize checks for running Spawners # parallelize checks for running Spawners
# run query on extant Server objects
# so this is O(running servers) not O(total users)
check_futures = [] check_futures = []
for orm_user in db.query(orm.User): for orm_server in db.query(orm.Server):
user = self.users[orm_user] orm_spawners = orm_server.spawner
self.log.debug("Loading state for %s from db", user.name) if not orm_spawners:
for name, orm_spawner in user.orm_spawners.items(): continue
if orm_spawner.server is not None: orm_spawner = orm_spawners[0]
# spawner should be running orm_user = orm_spawner.user
# instantiate Spawner wrapper and check if it's still alive # instantiate Spawner wrapper and check if it's still alive
spawner = user.spawners[name] # spawner should be running
# signal that check is pending to avoid race conditions user = self.users[orm_spawner.user]
spawner._check_pending = True spawner = user.spawners[orm_spawner.name]
f = asyncio.ensure_future(check_spawner(user, name, spawner)) self.log.debug("Loading state for %s from db", spawner._log_name)
check_futures.append(f) # signal that check is pending to avoid race conditions
spawner._check_pending = True
TOTAL_USERS.set(len(self.users)) f = asyncio.ensure_future(check_spawner(user, spawner.name, spawner))
check_futures.append(f)
# it's important that we get here before the first await # it's important that we get here before the first await
# so that we know all spawners are instantiated and in the check-pending state # so that we know all spawners are instantiated and in the check-pending state

View File

@@ -230,7 +230,7 @@ class Spawner(Base):
user_id = Column(Integer, ForeignKey('users.id', ondelete='CASCADE')) user_id = Column(Integer, ForeignKey('users.id', ondelete='CASCADE'))
server_id = Column(Integer, ForeignKey('servers.id', ondelete='SET NULL')) server_id = Column(Integer, ForeignKey('servers.id', ondelete='SET NULL'))
server = relationship(Server, cascade="all") server = relationship(Server, backref='spawner', cascade="all")
state = Column(JSONDict) state = Column(JSONDict)
name = Column(Unicode(255)) name = Column(Unicode(255))
@@ -282,7 +282,7 @@ class Service(Base):
# service-specific interface # service-specific interface
_server_id = Column(Integer, ForeignKey('servers.id', ondelete='SET NULL')) _server_id = Column(Integer, ForeignKey('servers.id', ondelete='SET NULL'))
server = relationship(Server, cascade='all') server = relationship(Server, backref='service', cascade='all')
pid = Column(Integer) pid = Column(Integer)
def new_api_token(self, token=None, **kwargs): def new_api_token(self, token=None, **kwargs):