mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-07 10:04:07 +00:00
Remove idle culler example
Has been moved to its own repo. See https://github.com/jupyterhub/the-littlest-jupyterhub/pull/559 for more info
This commit is contained in:
@@ -1,41 +1,4 @@
|
||||
# `cull-idle` Example
|
||||
# idle-culler example
|
||||
|
||||
The `cull_idle_servers.py` file provides a script to cull and shut down idle
|
||||
single-user notebook servers. This script is used when `cull-idle` is run as
|
||||
a Service or when it is run manually as a standalone script.
|
||||
|
||||
|
||||
## Configure `cull-idle` to run as a Hub-Managed Service
|
||||
|
||||
In `jupyterhub_config.py`, add the following dictionary for the `cull-idle`
|
||||
Service to the `c.JupyterHub.services` list:
|
||||
|
||||
```python
|
||||
c.JupyterHub.services = [
|
||||
{
|
||||
'name': 'cull-idle',
|
||||
'admin': True,
|
||||
'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
where:
|
||||
|
||||
- `'admin': True` indicates that the Service has 'admin' permissions, and
|
||||
- `'command'` indicates that the Service will be managed by the Hub.
|
||||
|
||||
## Run `cull-idle` manually as a standalone script
|
||||
|
||||
This will run `cull-idle` manually. `cull-idle` can be run as a standalone
|
||||
script anywhere with access to the Hub, and will periodically check for idle
|
||||
servers and shut them down via the Hub's REST API. In order to shutdown the
|
||||
servers, the token given to cull-idle must have admin privileges.
|
||||
|
||||
Generate an API token and store it in the `JUPYTERHUB_API_TOKEN` environment
|
||||
variable. Run `cull_idle_servers.py` manually.
|
||||
|
||||
```bash
|
||||
export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
|
||||
python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
|
||||
```
|
||||
The idle culler has been moved to its own repository at
|
||||
[jupyterhub/jupyterhub-idle-culler](https://github.com/jupyterhub/jupyterhub-idle-culler).
|
@@ -1,401 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""script to monitor and cull idle single-user servers
|
||||
|
||||
Caveats:
|
||||
|
||||
last_activity is not updated with high frequency,
|
||||
so cull timeout should be greater than the sum of:
|
||||
|
||||
- single-user websocket ping interval (default: 30s)
|
||||
- JupyterHub.last_activity_interval (default: 5 minutes)
|
||||
|
||||
You can run this as a service managed by JupyterHub with this in your config::
|
||||
|
||||
|
||||
c.JupyterHub.services = [
|
||||
{
|
||||
'name': 'cull-idle',
|
||||
'admin': True,
|
||||
'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
|
||||
}
|
||||
]
|
||||
|
||||
Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
|
||||
|
||||
export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
|
||||
python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
|
||||
|
||||
This script uses the same ``--timeout`` and ``--max-age`` values for
|
||||
culling users and users' servers. If you want a different value for
|
||||
users and servers, you should add this script to the services list
|
||||
twice, just with different ``name``s, different values, and one with
|
||||
the ``--cull-users`` option.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from functools import partial
|
||||
|
||||
try:
|
||||
from urllib.parse import quote
|
||||
except ImportError:
|
||||
from urllib import quote
|
||||
|
||||
import dateutil.parser
|
||||
|
||||
from tornado.gen import coroutine, multi
|
||||
from tornado.locks import Semaphore
|
||||
from tornado.log import app_log
|
||||
from tornado.httpclient import AsyncHTTPClient, HTTPRequest
|
||||
from tornado.ioloop import IOLoop, PeriodicCallback
|
||||
from tornado.options import define, options, parse_command_line
|
||||
|
||||
|
||||
def parse_date(date_string):
|
||||
"""Parse a timestamp
|
||||
|
||||
If it doesn't have a timezone, assume utc
|
||||
|
||||
Returned datetime object will always be timezone-aware
|
||||
"""
|
||||
dt = dateutil.parser.parse(date_string)
|
||||
if not dt.tzinfo:
|
||||
# assume naive timestamps are UTC
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
|
||||
|
||||
def format_td(td):
|
||||
"""
|
||||
Nicely format a timedelta object
|
||||
|
||||
as HH:MM:SS
|
||||
"""
|
||||
if td is None:
|
||||
return "unknown"
|
||||
if isinstance(td, str):
|
||||
return td
|
||||
seconds = int(td.total_seconds())
|
||||
h = seconds // 3600
|
||||
seconds = seconds % 3600
|
||||
m = seconds // 60
|
||||
seconds = seconds % 60
|
||||
return "{h:02}:{m:02}:{seconds:02}".format(h=h, m=m, seconds=seconds)
|
||||
|
||||
|
||||
@coroutine
|
||||
def cull_idle(
|
||||
url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10
|
||||
):
|
||||
"""Shutdown idle single-user servers
|
||||
|
||||
If cull_users, inactive *users* will be deleted as well.
|
||||
"""
|
||||
auth_header = {'Authorization': 'token %s' % api_token}
|
||||
req = HTTPRequest(url=url + '/users', headers=auth_header)
|
||||
now = datetime.now(timezone.utc)
|
||||
client = AsyncHTTPClient()
|
||||
|
||||
if concurrency:
|
||||
semaphore = Semaphore(concurrency)
|
||||
|
||||
@coroutine
|
||||
def fetch(req):
|
||||
"""client.fetch wrapped in a semaphore to limit concurrency"""
|
||||
yield semaphore.acquire()
|
||||
try:
|
||||
return (yield client.fetch(req))
|
||||
finally:
|
||||
yield semaphore.release()
|
||||
|
||||
else:
|
||||
fetch = client.fetch
|
||||
|
||||
resp = yield fetch(req)
|
||||
users = json.loads(resp.body.decode('utf8', 'replace'))
|
||||
futures = []
|
||||
|
||||
@coroutine
|
||||
def handle_server(user, server_name, server, max_age, inactive_limit):
|
||||
"""Handle (maybe) culling a single server
|
||||
|
||||
"server" is the entire server model from the API.
|
||||
|
||||
Returns True if server is now stopped (user removable),
|
||||
False otherwise.
|
||||
"""
|
||||
log_name = user['name']
|
||||
if server_name:
|
||||
log_name = '%s/%s' % (user['name'], server_name)
|
||||
if server.get('pending'):
|
||||
app_log.warning(
|
||||
"Not culling server %s with pending %s", log_name, server['pending']
|
||||
)
|
||||
return False
|
||||
|
||||
# jupyterhub < 0.9 defined 'server.url' once the server was ready
|
||||
# as an *implicit* signal that the server was ready.
|
||||
# 0.9 adds a dedicated, explicit 'ready' field.
|
||||
# By current (0.9) definitions, servers that have no pending
|
||||
# events and are not ready shouldn't be in the model,
|
||||
# but let's check just to be safe.
|
||||
|
||||
if not server.get('ready', bool(server['url'])):
|
||||
app_log.warning(
|
||||
"Not culling not-ready not-pending server %s: %s", log_name, server
|
||||
)
|
||||
return False
|
||||
|
||||
if server.get('started'):
|
||||
age = now - parse_date(server['started'])
|
||||
else:
|
||||
# started may be undefined on jupyterhub < 0.9
|
||||
age = None
|
||||
|
||||
# check last activity
|
||||
# last_activity can be None in 0.9
|
||||
if server['last_activity']:
|
||||
inactive = now - parse_date(server['last_activity'])
|
||||
else:
|
||||
# no activity yet, use start date
|
||||
# last_activity may be None with jupyterhub 0.9,
|
||||
# which introduces the 'started' field which is never None
|
||||
# for running servers
|
||||
inactive = age
|
||||
|
||||
# CUSTOM CULLING TEST CODE HERE
|
||||
# Add in additional server tests here. Return False to mean "don't
|
||||
# cull", True means "cull immediately", or, for example, update some
|
||||
# other variables like inactive_limit.
|
||||
#
|
||||
# Here, server['state'] is the result of the get_state method
|
||||
# on the spawner. This does *not* contain the below by
|
||||
# default, you may have to modify your spawner to make this
|
||||
# work. The `user` variable is the user model from the API.
|
||||
#
|
||||
# if server['state']['profile_name'] == 'unlimited'
|
||||
# return False
|
||||
# inactive_limit = server['state']['culltime']
|
||||
|
||||
should_cull = (
|
||||
inactive is not None and inactive.total_seconds() >= inactive_limit
|
||||
)
|
||||
if should_cull:
|
||||
app_log.info(
|
||||
"Culling server %s (inactive for %s)", log_name, format_td(inactive)
|
||||
)
|
||||
|
||||
if max_age and not should_cull:
|
||||
# only check started if max_age is specified
|
||||
# so that we can still be compatible with jupyterhub 0.8
|
||||
# which doesn't define the 'started' field
|
||||
if age is not None and age.total_seconds() >= max_age:
|
||||
app_log.info(
|
||||
"Culling server %s (age: %s, inactive for %s)",
|
||||
log_name,
|
||||
format_td(age),
|
||||
format_td(inactive),
|
||||
)
|
||||
should_cull = True
|
||||
|
||||
if not should_cull:
|
||||
app_log.debug(
|
||||
"Not culling server %s (age: %s, inactive for %s)",
|
||||
log_name,
|
||||
format_td(age),
|
||||
format_td(inactive),
|
||||
)
|
||||
return False
|
||||
|
||||
if server_name:
|
||||
# culling a named server
|
||||
delete_url = url + "/users/%s/servers/%s" % (
|
||||
quote(user['name']),
|
||||
quote(server['name']),
|
||||
)
|
||||
else:
|
||||
delete_url = url + '/users/%s/server' % quote(user['name'])
|
||||
|
||||
req = HTTPRequest(url=delete_url, method='DELETE', headers=auth_header)
|
||||
resp = yield fetch(req)
|
||||
if resp.code == 202:
|
||||
app_log.warning("Server %s is slow to stop", log_name)
|
||||
# return False to prevent culling user with pending shutdowns
|
||||
return False
|
||||
return True
|
||||
|
||||
@coroutine
|
||||
def handle_user(user):
|
||||
"""Handle one user.
|
||||
|
||||
Create a list of their servers, and async exec them. Wait for
|
||||
that to be done, and if all servers are stopped, possibly cull
|
||||
the user.
|
||||
"""
|
||||
# shutdown servers first.
|
||||
# Hub doesn't allow deleting users with running servers.
|
||||
# jupyterhub 0.9 always provides a 'servers' model.
|
||||
# 0.8 only does this when named servers are enabled.
|
||||
if 'servers' in user:
|
||||
servers = user['servers']
|
||||
else:
|
||||
# jupyterhub < 0.9 without named servers enabled.
|
||||
# create servers dict with one entry for the default server
|
||||
# from the user model.
|
||||
# only if the server is running.
|
||||
servers = {}
|
||||
if user['server']:
|
||||
servers[''] = {
|
||||
'last_activity': user['last_activity'],
|
||||
'pending': user['pending'],
|
||||
'url': user['server'],
|
||||
}
|
||||
server_futures = [
|
||||
handle_server(user, server_name, server, max_age, inactive_limit)
|
||||
for server_name, server in servers.items()
|
||||
]
|
||||
results = yield multi(server_futures)
|
||||
if not cull_users:
|
||||
return
|
||||
# some servers are still running, cannot cull users
|
||||
still_alive = len(results) - sum(results)
|
||||
if still_alive:
|
||||
app_log.debug(
|
||||
"Not culling user %s with %i servers still alive",
|
||||
user['name'],
|
||||
still_alive,
|
||||
)
|
||||
return False
|
||||
|
||||
should_cull = False
|
||||
if user.get('created'):
|
||||
age = now - parse_date(user['created'])
|
||||
else:
|
||||
# created may be undefined on jupyterhub < 0.9
|
||||
age = None
|
||||
|
||||
# check last activity
|
||||
# last_activity can be None in 0.9
|
||||
if user['last_activity']:
|
||||
inactive = now - parse_date(user['last_activity'])
|
||||
else:
|
||||
# no activity yet, use start date
|
||||
# last_activity may be None with jupyterhub 0.9,
|
||||
# which introduces the 'created' field which is never None
|
||||
inactive = age
|
||||
|
||||
should_cull = (
|
||||
inactive is not None and inactive.total_seconds() >= inactive_limit
|
||||
)
|
||||
if should_cull:
|
||||
app_log.info("Culling user %s (inactive for %s)", user['name'], inactive)
|
||||
|
||||
if max_age and not should_cull:
|
||||
# only check created if max_age is specified
|
||||
# so that we can still be compatible with jupyterhub 0.8
|
||||
# which doesn't define the 'started' field
|
||||
if age is not None and age.total_seconds() >= max_age:
|
||||
app_log.info(
|
||||
"Culling user %s (age: %s, inactive for %s)",
|
||||
user['name'],
|
||||
format_td(age),
|
||||
format_td(inactive),
|
||||
)
|
||||
should_cull = True
|
||||
|
||||
if not should_cull:
|
||||
app_log.debug(
|
||||
"Not culling user %s (created: %s, last active: %s)",
|
||||
user['name'],
|
||||
format_td(age),
|
||||
format_td(inactive),
|
||||
)
|
||||
return False
|
||||
|
||||
req = HTTPRequest(
|
||||
url=url + '/users/%s' % user['name'], method='DELETE', headers=auth_header
|
||||
)
|
||||
yield fetch(req)
|
||||
return True
|
||||
|
||||
for user in users:
|
||||
futures.append((user['name'], handle_user(user)))
|
||||
|
||||
for (name, f) in futures:
|
||||
try:
|
||||
result = yield f
|
||||
except Exception:
|
||||
app_log.exception("Error processing %s", name)
|
||||
else:
|
||||
if result:
|
||||
app_log.debug("Finished culling %s", name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
define(
|
||||
'url',
|
||||
default=os.environ.get('JUPYTERHUB_API_URL'),
|
||||
help="The JupyterHub API URL",
|
||||
)
|
||||
define('timeout', default=600, help="The idle timeout (in seconds)")
|
||||
define(
|
||||
'cull_every',
|
||||
default=0,
|
||||
help="The interval (in seconds) for checking for idle servers to cull",
|
||||
)
|
||||
define(
|
||||
'max_age',
|
||||
default=0,
|
||||
help="The maximum age (in seconds) of servers that should be culled even if they are active",
|
||||
)
|
||||
define(
|
||||
'cull_users',
|
||||
default=False,
|
||||
help="""Cull users in addition to servers.
|
||||
This is for use in temporary-user cases such as tmpnb.""",
|
||||
)
|
||||
define(
|
||||
'concurrency',
|
||||
default=10,
|
||||
help="""Limit the number of concurrent requests made to the Hub.
|
||||
|
||||
Deleting a lot of users at the same time can slow down the Hub,
|
||||
so limit the number of API requests we have outstanding at any given time.
|
||||
""",
|
||||
)
|
||||
|
||||
parse_command_line()
|
||||
if not options.cull_every:
|
||||
options.cull_every = options.timeout // 2
|
||||
api_token = os.environ['JUPYTERHUB_API_TOKEN']
|
||||
|
||||
try:
|
||||
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
|
||||
except ImportError as e:
|
||||
app_log.warning(
|
||||
"Could not load pycurl: %s\n"
|
||||
"pycurl is recommended if you have a large number of users.",
|
||||
e,
|
||||
)
|
||||
|
||||
loop = IOLoop.current()
|
||||
cull = partial(
|
||||
cull_idle,
|
||||
url=options.url,
|
||||
api_token=api_token,
|
||||
inactive_limit=options.timeout,
|
||||
cull_users=options.cull_users,
|
||||
max_age=options.max_age,
|
||||
concurrency=options.concurrency,
|
||||
)
|
||||
# schedule first cull immediately
|
||||
# because PeriodicCallback doesn't start until the end of the first interval
|
||||
loop.add_callback(cull)
|
||||
# schedule periodic cull
|
||||
pc = PeriodicCallback(cull, 1e3 * options.cull_every)
|
||||
pc.start()
|
||||
try:
|
||||
loop.start()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
@@ -1,11 +0,0 @@
|
||||
import sys
|
||||
|
||||
# run cull-idle as a service
|
||||
|
||||
c.JupyterHub.services = [
|
||||
{
|
||||
'name': 'cull-idle',
|
||||
'admin': True,
|
||||
'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
|
||||
}
|
||||
]
|
Reference in New Issue
Block a user