Remove idle culler example

Has been moved to its own repo. See https://github.com/jupyterhub/the-littlest-jupyterhub/pull/559 for more info
2025-10-07 10:04:07 +00:00 · 2020-07-12 17:14:14 +05:30
parent 42adb44153
commit 1beea06ce5
3 changed files with 3 additions and 452 deletions
--- a/examples/cull-idle/README.md
+++ b/examples/cull-idle/README.md
@@ -1,41 +1,4 @@
-# `cull-idle` Example
+# idle-culler example

-The `cull_idle_servers.py` file provides a script to cull and shut down idle
-single-user notebook servers. This script is used when `cull-idle` is run as
-a Service or when it is run manually as a standalone script.
-
-
-## Configure `cull-idle` to run as a Hub-Managed Service
-
-In `jupyterhub_config.py`, add the following dictionary for the `cull-idle`
-Service to the `c.JupyterHub.services` list:
-
-```python
-c.JupyterHub.services = [
-    {
-        'name': 'cull-idle',
-        'admin': True,
-        'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
-    }
-]
-```
-
-where:
-
- `'admin': True` indicates that the Service has 'admin' permissions, and
- `'command'` indicates that the Service will be managed by the Hub.
-
-## Run `cull-idle` manually as a standalone script
-
-This will run `cull-idle` manually. `cull-idle` can be run as a standalone
-script anywhere with access to the Hub, and will periodically check for idle
-servers and shut them down via the Hub's REST API. In order to shutdown the
-servers, the token given to cull-idle must have admin privileges.
-
-Generate an API token and store it in the `JUPYTERHUB_API_TOKEN` environment
-variable. Run `cull_idle_servers.py` manually. 
-
-```bash
-    export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
-    python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
-```
+The idle culler has been moved to its own repository at
+[jupyterhub/jupyterhub-idle-culler](https://github.com/jupyterhub/jupyterhub-idle-culler).
--- a/examples/cull-idle/cull_idle_servers.py
+++ b/examples/cull-idle/cull_idle_servers.py
@@ -1,401 +0,0 @@
-#!/usr/bin/env python3
-"""script to monitor and cull idle single-user servers
-
-Caveats:
-
-last_activity is not updated with high frequency,
-so cull timeout should be greater than the sum of:
-
- single-user websocket ping interval (default: 30s)
- JupyterHub.last_activity_interval (default: 5 minutes)
-
-You can run this as a service managed by JupyterHub with this in your config::
-
-
-    c.JupyterHub.services = [
-        {
-            'name': 'cull-idle',
-            'admin': True,
-            'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
-        }
-    ]
-
-Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
-
-    export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
-    python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
-
-This script uses the same ``--timeout`` and ``--max-age`` values for
-culling users and users' servers.  If you want a different value for
-users and servers, you should add this script to the services list
-twice, just with different ``name``s, different values, and one with
-the ``--cull-users`` option.
-"""
-import json
-import os
-from datetime import datetime
-from datetime import timezone
-from functools import partial
-
-try:
-    from urllib.parse import quote
-except ImportError:
-    from urllib import quote
-
-import dateutil.parser
-
-from tornado.gen import coroutine, multi
-from tornado.locks import Semaphore
-from tornado.log import app_log
-from tornado.httpclient import AsyncHTTPClient, HTTPRequest
-from tornado.ioloop import IOLoop, PeriodicCallback
-from tornado.options import define, options, parse_command_line
-
-
-def parse_date(date_string):
-    """Parse a timestamp
-
-    If it doesn't have a timezone, assume utc
-
-    Returned datetime object will always be timezone-aware
-    """
-    dt = dateutil.parser.parse(date_string)
-    if not dt.tzinfo:
-        # assume naive timestamps are UTC
-        dt = dt.replace(tzinfo=timezone.utc)
-    return dt
-
-
-def format_td(td):
-    """
-    Nicely format a timedelta object
-
-    as HH:MM:SS
-    """
-    if td is None:
-        return "unknown"
-    if isinstance(td, str):
-        return td
-    seconds = int(td.total_seconds())
-    h = seconds // 3600
-    seconds = seconds % 3600
-    m = seconds // 60
-    seconds = seconds % 60
-    return "{h:02}:{m:02}:{seconds:02}".format(h=h, m=m, seconds=seconds)
-
-
-@coroutine
-def cull_idle(
-    url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10
-):
-    """Shutdown idle single-user servers
-
-    If cull_users, inactive *users* will be deleted as well.
-    """
-    auth_header = {'Authorization': 'token %s' % api_token}
-    req = HTTPRequest(url=url + '/users', headers=auth_header)
-    now = datetime.now(timezone.utc)
-    client = AsyncHTTPClient()
-
-    if concurrency:
-        semaphore = Semaphore(concurrency)
-
-        @coroutine
-        def fetch(req):
-            """client.fetch wrapped in a semaphore to limit concurrency"""
-            yield semaphore.acquire()
-            try:
-                return (yield client.fetch(req))
-            finally:
-                yield semaphore.release()
-
-    else:
-        fetch = client.fetch
-
-    resp = yield fetch(req)
-    users = json.loads(resp.body.decode('utf8', 'replace'))
-    futures = []
-
-    @coroutine
-    def handle_server(user, server_name, server, max_age, inactive_limit):
-        """Handle (maybe) culling a single server
-
-        "server" is the entire server model from the API.
-
-        Returns True if server is now stopped (user removable),
-        False otherwise.
-        """
-        log_name = user['name']
-        if server_name:
-            log_name = '%s/%s' % (user['name'], server_name)
-        if server.get('pending'):
-            app_log.warning(
-                "Not culling server %s with pending %s", log_name, server['pending']
-            )
-            return False
-
-        # jupyterhub < 0.9 defined 'server.url' once the server was ready
-        # as an *implicit* signal that the server was ready.
-        # 0.9 adds a dedicated, explicit 'ready' field.
-        # By current (0.9) definitions, servers that have no pending
-        # events and are not ready shouldn't be in the model,
-        # but let's check just to be safe.
-
-        if not server.get('ready', bool(server['url'])):
-            app_log.warning(
-                "Not culling not-ready not-pending server %s: %s", log_name, server
-            )
-            return False
-
-        if server.get('started'):
-            age = now - parse_date(server['started'])
-        else:
-            # started may be undefined on jupyterhub < 0.9
-            age = None
-
-        # check last activity
-        # last_activity can be None in 0.9
-        if server['last_activity']:
-            inactive = now - parse_date(server['last_activity'])
-        else:
-            # no activity yet, use start date
-            # last_activity may be None with jupyterhub 0.9,
-            # which introduces the 'started' field which is never None
-            # for running servers
-            inactive = age
-
-        # CUSTOM CULLING TEST CODE HERE
-        # Add in additional server tests here.  Return False to mean "don't
-        # cull", True means "cull immediately", or, for example, update some
-        # other variables like inactive_limit.
-        #
-        # Here, server['state'] is the result of the get_state method
-        # on the spawner.  This does *not* contain the below by
-        # default, you may have to modify your spawner to make this
-        # work.  The `user` variable is the user model from the API.
-        #
-        # if server['state']['profile_name'] == 'unlimited'
-        #     return False
-        # inactive_limit = server['state']['culltime']
-
-        should_cull = (
-            inactive is not None and inactive.total_seconds() >= inactive_limit
-        )
-        if should_cull:
-            app_log.info(
-                "Culling server %s (inactive for %s)", log_name, format_td(inactive)
-            )
-
-        if max_age and not should_cull:
-            # only check started if max_age is specified
-            # so that we can still be compatible with jupyterhub 0.8
-            # which doesn't define the 'started' field
-            if age is not None and age.total_seconds() >= max_age:
-                app_log.info(
-                    "Culling server %s (age: %s, inactive for %s)",
-                    log_name,
-                    format_td(age),
-                    format_td(inactive),
-                )
-                should_cull = True
-
-        if not should_cull:
-            app_log.debug(
-                "Not culling server %s (age: %s, inactive for %s)",
-                log_name,
-                format_td(age),
-                format_td(inactive),
-            )
-            return False
-
-        if server_name:
-            # culling a named server
-            delete_url = url + "/users/%s/servers/%s" % (
-                quote(user['name']),
-                quote(server['name']),
-            )
-        else:
-            delete_url = url + '/users/%s/server' % quote(user['name'])
-
-        req = HTTPRequest(url=delete_url, method='DELETE', headers=auth_header)
-        resp = yield fetch(req)
-        if resp.code == 202:
-            app_log.warning("Server %s is slow to stop", log_name)
-            # return False to prevent culling user with pending shutdowns
-            return False
-        return True
-
-    @coroutine
-    def handle_user(user):
-        """Handle one user.
-
-        Create a list of their servers, and async exec them.  Wait for
-        that to be done, and if all servers are stopped, possibly cull
-        the user.
-        """
-        # shutdown servers first.
-        # Hub doesn't allow deleting users with running servers.
-        # jupyterhub 0.9 always provides a 'servers' model.
-        # 0.8 only does this when named servers are enabled.
-        if 'servers' in user:
-            servers = user['servers']
-        else:
-            # jupyterhub < 0.9 without named servers enabled.
-            # create servers dict with one entry for the default server
-            # from the user model.
-            # only if the server is running.
-            servers = {}
-            if user['server']:
-                servers[''] = {
-                    'last_activity': user['last_activity'],
-                    'pending': user['pending'],
-                    'url': user['server'],
-                }
-        server_futures = [
-            handle_server(user, server_name, server, max_age, inactive_limit)
-            for server_name, server in servers.items()
-        ]
-        results = yield multi(server_futures)
-        if not cull_users:
-            return
-        # some servers are still running, cannot cull users
-        still_alive = len(results) - sum(results)
-        if still_alive:
-            app_log.debug(
-                "Not culling user %s with %i servers still alive",
-                user['name'],
-                still_alive,
-            )
-            return False
-
-        should_cull = False
-        if user.get('created'):
-            age = now - parse_date(user['created'])
-        else:
-            # created may be undefined on jupyterhub < 0.9
-            age = None
-
-        # check last activity
-        # last_activity can be None in 0.9
-        if user['last_activity']:
-            inactive = now - parse_date(user['last_activity'])
-        else:
-            # no activity yet, use start date
-            # last_activity may be None with jupyterhub 0.9,
-            # which introduces the 'created' field which is never None
-            inactive = age
-
-        should_cull = (
-            inactive is not None and inactive.total_seconds() >= inactive_limit
-        )
-        if should_cull:
-            app_log.info("Culling user %s (inactive for %s)", user['name'], inactive)
-
-        if max_age and not should_cull:
-            # only check created if max_age is specified
-            # so that we can still be compatible with jupyterhub 0.8
-            # which doesn't define the 'started' field
-            if age is not None and age.total_seconds() >= max_age:
-                app_log.info(
-                    "Culling user %s (age: %s, inactive for %s)",
-                    user['name'],
-                    format_td(age),
-                    format_td(inactive),
-                )
-                should_cull = True
-
-        if not should_cull:
-            app_log.debug(
-                "Not culling user %s (created: %s, last active: %s)",
-                user['name'],
-                format_td(age),
-                format_td(inactive),
-            )
-            return False
-
-        req = HTTPRequest(
-            url=url + '/users/%s' % user['name'], method='DELETE', headers=auth_header
-        )
-        yield fetch(req)
-        return True
-
-    for user in users:
-        futures.append((user['name'], handle_user(user)))
-
-    for (name, f) in futures:
-        try:
-            result = yield f
-        except Exception:
-            app_log.exception("Error processing %s", name)
-        else:
-            if result:
-                app_log.debug("Finished culling %s", name)
-
-
-if __name__ == '__main__':
-    define(
-        'url',
-        default=os.environ.get('JUPYTERHUB_API_URL'),
-        help="The JupyterHub API URL",
-    )
-    define('timeout', default=600, help="The idle timeout (in seconds)")
-    define(
-        'cull_every',
-        default=0,
-        help="The interval (in seconds) for checking for idle servers to cull",
-    )
-    define(
-        'max_age',
-        default=0,
-        help="The maximum age (in seconds) of servers that should be culled even if they are active",
-    )
-    define(
-        'cull_users',
-        default=False,
-        help="""Cull users in addition to servers.
-                This is for use in temporary-user cases such as tmpnb.""",
-    )
-    define(
-        'concurrency',
-        default=10,
-        help="""Limit the number of concurrent requests made to the Hub.
-
-                Deleting a lot of users at the same time can slow down the Hub,
-                so limit the number of API requests we have outstanding at any given time.
-                """,
-    )
-
-    parse_command_line()
-    if not options.cull_every:
-        options.cull_every = options.timeout // 2
-    api_token = os.environ['JUPYTERHUB_API_TOKEN']
-
-    try:
-        AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
-    except ImportError as e:
-        app_log.warning(
-            "Could not load pycurl: %s\n"
-            "pycurl is recommended if you have a large number of users.",
-            e,
-        )
-
-    loop = IOLoop.current()
-    cull = partial(
-        cull_idle,
-        url=options.url,
-        api_token=api_token,
-        inactive_limit=options.timeout,
-        cull_users=options.cull_users,
-        max_age=options.max_age,
-        concurrency=options.concurrency,
-    )
-    # schedule first cull immediately
-    # because PeriodicCallback doesn't start until the end of the first interval
-    loop.add_callback(cull)
-    # schedule periodic cull
-    pc = PeriodicCallback(cull, 1e3 * options.cull_every)
-    pc.start()
-    try:
-        loop.start()
-    except KeyboardInterrupt:
-        pass
--- a/examples/cull-idle/jupyterhub_config.py
+++ b/examples/cull-idle/jupyterhub_config.py
@@ -1,11 +0,0 @@
-import sys
-
-# run cull-idle as a service
-
-c.JupyterHub.services = [
-    {
-        'name': 'cull-idle',
-        'admin': True,
-        'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
-    }
-]