make init_spawners check O(running servers) not O(total users)

query on Server objects instead of User objects

avoids lots of ORM work on startup since there are typically a small number of running servers
relative to the total number of users

this also means that the users dict is not fully populated. Is that okay? I hope so.
This commit is contained in:
Min RK
2020-02-18 17:10:19 +01:00
parent bc7bb5076f
commit 79a51dfdce
2 changed files with 26 additions and 17 deletions

View File

@@ -1670,9 +1670,12 @@ class JupyterHub(Application):
# This lets whitelist be used to set up initial list,
# but changes to the whitelist can occur in the database,
# and persist across sessions.
total_users = 0
for user in db.query(orm.User):
try:
await maybe_future(self.authenticator.add_user(user))
f = self.authenticator.add_user(user)
if f:
await maybe_future()
except Exception:
self.log.exception("Error adding user %s already in db", user.name)
if self.authenticator.delete_invalid_users:
@@ -1694,6 +1697,7 @@ class JupyterHub(Application):
)
)
else:
total_users += 1
# handle database upgrades where user.created is undefined.
# we don't want to allow user.created to be undefined,
# so initialize it to last_activity (if defined) or now.
@@ -1705,6 +1709,8 @@ class JupyterHub(Application):
# From this point on, any user changes should be done simultaneously
# to the whitelist set and user db, unless the whitelist is empty (all users allowed).
TOTAL_USERS.set(total_users)
async def init_groups(self):
"""Load predefined groups into the database"""
db = self.db
@@ -2005,21 +2011,24 @@ class JupyterHub(Application):
spawner._check_pending = False
# parallelize checks for running Spawners
# run query on extant Server objects
# so this is O(running servers) not O(total users)
check_futures = []
for orm_user in db.query(orm.User):
user = self.users[orm_user]
self.log.debug("Loading state for %s from db", user.name)
for name, orm_spawner in user.orm_spawners.items():
if orm_spawner.server is not None:
# spawner should be running
# instantiate Spawner wrapper and check if it's still alive
spawner = user.spawners[name]
# signal that check is pending to avoid race conditions
spawner._check_pending = True
f = asyncio.ensure_future(check_spawner(user, name, spawner))
check_futures.append(f)
TOTAL_USERS.set(len(self.users))
for orm_server in db.query(orm.Server):
orm_spawners = orm_server.spawner
if not orm_spawners:
continue
orm_spawner = orm_spawners[0]
orm_user = orm_spawner.user
# instantiate Spawner wrapper and check if it's still alive
# spawner should be running
user = self.users[orm_spawner.user]
spawner = user.spawners[orm_spawner.name]
self.log.debug("Loading state for %s from db", spawner._log_name)
# signal that check is pending to avoid race conditions
spawner._check_pending = True
f = asyncio.ensure_future(check_spawner(user, spawner.name, spawner))
check_futures.append(f)
# it's important that we get here before the first await
# so that we know all spawners are instantiated and in the check-pending state

View File

@@ -230,7 +230,7 @@ class Spawner(Base):
user_id = Column(Integer, ForeignKey('users.id', ondelete='CASCADE'))
server_id = Column(Integer, ForeignKey('servers.id', ondelete='SET NULL'))
server = relationship(Server, cascade="all")
server = relationship(Server, backref='spawner', cascade="all")
state = Column(JSONDict)
name = Column(Unicode(255))
@@ -282,7 +282,7 @@ class Service(Base):
# service-specific interface
_server_id = Column(Integer, ForeignKey('servers.id', ondelete='SET NULL'))
server = relationship(Server, cascade='all')
server = relationship(Server, backref='service', cascade='all')
pid = Column(Integer)
def new_api_token(self, token=None, **kwargs):