update cull_idle_servers for 0.9

- adds max-age - handle named servers - refactor culling a bit
2025-10-13 04:53:01 +00:00 · 2018-03-22 17:21:18 +01:00
parent 4fa9535fd4
commit 7bf93cb7e6
1 changed files with 176 additions and 51 deletions
--- a/examples/cull-idle/cull_idle_servers.py
+++ b/examples/cull-idle/cull_idle_servers.py
@@ -26,7 +26,8 @@ Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_
    python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
 """

-import datetime
+from datetime import datetime, timedelta, timezone
+from functools import partial
 import json
 import os

@@ -35,92 +36,210 @@ try:
 except ImportError:
    from urllib import quote

-from dateutil.parser import parse as parse_date
+import dateutil.parser

-from tornado.gen import coroutine
+from tornado.gen import coroutine, multi
 from tornado.log import app_log
 from tornado.httpclient import AsyncHTTPClient, HTTPRequest
 from tornado.ioloop import IOLoop, PeriodicCallback
 from tornado.options import define, options, parse_command_line

+def parse_date(date_string):
+    """Parse a timestamp
+
+    If it doesn't have a timezone, assume utc
+
+    Returned datetime object will always be timezone-aware
+    """
+    dt = dateutil.parser.parse(date_string)
+    if not dt.tzinfo:
+        # assume naïve timestamps are UTC
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt

@coroutine
-def cull_idle(url, api_token, timeout, cull_users=False):
+def cull_idle(url, api_token, timeout, cull_users=False, max_age=0):
    """Shutdown idle single-user servers

    If cull_users, inactive *users* will be deleted as well.
    """
    auth_header = {
-            'Authorization': 'token %s' % api_token
-        }
+        'Authorization': 'token %s' % api_token,
+    }
    req = HTTPRequest(url=url + '/users',
        headers=auth_header,
    )
-    now = datetime.datetime.utcnow()
-    cull_limit = now - datetime.timedelta(seconds=timeout)
+    now = datetime.now(timezone.utc)
+    inactive_limit = now - timedelta(seconds=timeout)
+    max_age_limit = now - timedelta(seconds=max_age)
    client = AsyncHTTPClient()
    resp = yield client.fetch(req)
    users = json.loads(resp.body.decode('utf8', 'replace'))
    futures = []

    @coroutine
-    def cull_one(user, last_activity):
-        """cull one user"""
+    def handle_server(user, server_name, server):
+        """Handle (maybe) culling a single server

-        # shutdown server first. Hub doesn't allow deleting users with running servers.
-        if user['server']:
-            app_log.info("Culling server for %s (inactive since %s)", user['name'], last_activity)
-            req = HTTPRequest(url=url + '/users/%s/server' % quote(user['name']),
-                method='DELETE',
-                headers=auth_header,
+        Returns True if server was culled,
+        False otherwise.
+        """
+        log_name = user['name']
+        if server_name:
+            log_name = '%s/%s' % (user['name'], server_name)
+        if server.get('pending'):
+            app_log.warning(
+                "Not culling server %s with pending %s",
+                log_name, server['pending'])
+            return False
+
+        if server.get('started'):
+            started = parse_date(server['started'])
+        else:
+            # started may be undefined on jupyterhub < 0.9
+            started = 'unknown'
+
+        # check last activity
+        # last_activity can be None in 0.9
+        if server['last_activity']:
+            last_activity = parse_date(server['last_activity'])
+        else:
+            # no activity yet, use start date
+            # last_activity may be None with jupyterhub 0.9,
+            # which introduces the 'started' field which is never None
+            # for running servers
+            last_activity = started
+
+        should_cull = last_activity < inactive_limit
+        if should_cull:
+            app_log.info(
+                "Culling server %s (inactive since %s)",
+                log_name, last_activity)
+
+        if max_age and not should_cull:
+            # only check started if max_age is specified
+            # so that we can still be compatible with jupyterhub 0.8
+            # which doesn't define the 'started' field
+            print(started, max_age_limit)
+            if started < max_age_limit:
+                app_log.info(
+                    "Culling server %s (started: %s, last active: %s)",
+                    log_name, started, last_activity)
+                should_cull = True
+
+        if not should_cull:
+            app_log.debug(
+                "Not culling server %s (started: %s, last active: %s)",
+                log_name, started, last_activity)
+            return False
+
+        req = HTTPRequest(url=url + '/users/%s/server' % quote(user['name']),
+            method='DELETE',
+            headers=auth_header,
+        )
+        resp = yield client.fetch(req)
+        if resp.code == 202:
+            app_log.warning(
+                "Server %s is slow to stop",
+                log_name,
            )
-            resp = yield client.fetch(req)
-            if resp.code == 202:
-                msg = "Server for {} is slow to stop.".format(user['name'])
-                if cull_users:
-                    app_log.warning(msg + " Not culling user yet.")
-                    # return here so we don't continue to cull the user
-                    # which will fail if the server is still trying to shutdown
-                    return
-                app_log.warning(msg)
-        if cull_users:
-            app_log.info("Culling user %s (inactive since %s)", user['name'], last_activity)
-            req = HTTPRequest(url=url + '/users/%s' % user['name'],
-                method='DELETE',
-                headers=auth_header,
-            )
-            yield client.fetch(req)
+            # return False to prevent culling user with pending shutdowns
+            return False
+        return True
+
+    @coroutine
+    def handle_user(user):
+        """Handle one user"""
+        # shutdown servers first.
+        # Hub doesn't allow deleting users with running servers.
+        servers = user.get('servers',
+            {'': {
+                    'started': user.get('started'),
+                    'last_activity': user['last_activity'],
+                    'pending': user['pending'],
+                }
+            }
+        )
+        server_futures = [
+            handle_server(user, server_name, server)
+            for server_name, server in servers.items()
+        ]
+        results = yield multi(server_futures)
+        print(results)
+        if not cull_users:
+            return
+        # some servers are still running, cannot cull users
+        still_alive = len(results) - sum(results)
+        if still_alive:
+            app_log.debug(
+                "Not culling user %s with %i servers still alive",
+                user['name'], still_alive)
+            return False
+
+        should_cull = False
+        if user.get('created'):
+            created = parse_date(user['created'])
+        else:
+            # created may be undefined on jupyterhub < 0.9
+            created = 'unknown'
+
+        # check last activity
+        # last_activity can be None in 0.9
+        if user['last_activity']:
+            last_activity = parse_date(user['last_activity'])
+        else:
+            # no activity yet, use start date
+            # last_activity may be None with jupyterhub 0.9,
+            # which introduces the 'created' field which is never None
+            last_activity = created
+
+        should_cull = last_activity < inactive_limit
+        if should_cull:
+            app_log.info(
+                "Culling user %s (inactive since %s)",
+                user['name'], last_activity)
+
+        if max_age and not should_cull:
+            # only check created if max_age is specified
+            # so that we can still be compatible with jupyterhub 0.8
+            # which doesn't define the 'started' field
+            if created < max_age_limit:
+                app_log.info(
+                    "Culling user %s (created: %s, last active: %s)",
+                    user['name'], created, last_activity)
+                should_cull = True
+
+        if not should_cull:
+            app_log.debug(
+                "Not culling user %s (created: %s, last active: %s)",
+                user['name'], created, last_activity)
+            return False
+
+        req = HTTPRequest(url=url + '/users/%s' % user['name'],
+            method='DELETE',
+            headers=auth_header,
+        )
+        yield client.fetch(req)
+        return True

    for user in users:
-        if not user['server'] and not cull_users:
-            # server not running and not culling users, nothing to do
-            continue
-        if not user['last_activity']:
-            continue
-        last_activity = parse_date(user['last_activity'])
-        if last_activity < cull_limit:
-            # user might be in a transition (e.g. starting or stopping)
-            # don't try to cull if this is happening
-            if user['pending']:
-                app_log.warning("Not culling user %s with pending %s", user['name'], user['pending'])
-                continue
-            futures.append((user['name'], cull_one(user, last_activity)))
-        else:
-            app_log.debug("Not culling %s (active since %s)", user['name'], last_activity)
+        futures.append((user['name'], handle_user(user)))

    for (name, f) in futures:
        try:
-            yield f
+            result = yield f
        except Exception:
-            app_log.exception("Error culling %s", name)
+            app_log.exception("Error processing %s", name)
        else:
-            app_log.debug("Finished culling %s", name)
+            if result:
+                app_log.debug("Finished culling %s", name)


 if __name__ == '__main__':
    define('url', default=os.environ.get('JUPYTERHUB_API_URL'), help="The JupyterHub API URL")
    define('timeout', default=600, help="The idle timeout (in seconds)")
    define('cull_every', default=0, help="The interval (in seconds) for checking for idle servers to cull")
+    define('max_age', default=0, help="The maximum age (in seconds) of servers that should be culled even if they are active")
    define('cull_users', default=False,
        help="""Cull users in addition to servers.
                This is for use in temporary-user cases such as tmpnb.""",
@@ -137,7 +256,13 @@ if __name__ == '__main__':
        app_log.warning("Could not load pycurl: %s\npycurl is recommended if you have a large number of users.", e)

    loop = IOLoop.current()
-    cull = lambda : cull_idle(options.url, api_token, options.timeout, options.cull_users)
+    cull = partial(cull_idle,
+        url=options.url,
+        api_token=api_token,
+        timeout=options.timeout,
+        cull_users=options.cull_users,
+        max_age=options.max_age,
+    )
    # schedule first cull immediately
    # because PeriodicCallback doesn't start until the end of the first interval
    loop.add_callback(cull)