Merge pull request #4525 from danilopeixoto/metrics-prefix

Add `JUPYTERHUB_METRICS_PREFIX` environment variable to customize metrics prefix
This commit is contained in:
Min RK
2023-08-10 12:48:14 +02:00
committed by GitHub
4 changed files with 76 additions and 25 deletions

View File

@@ -18,3 +18,17 @@ tool like [Grafana](https://grafana.com).
/reference/metrics
```
## Customizing the metrics prefix
JupyterHub metrics all have a `jupyterhub_` prefix.
As of JupyterHub 5.0, this can be overridden with `$JUPYTERHUB_METRICS_PREFIX` environment variable
in the Hub's environment.
For example,
```bash
export JUPYTERHUB_METRICS_PREFIX=jupyterhub_prod
```
would result in the metric `jupyterhub_prod_active_users`, etc.

View File

@@ -3,9 +3,11 @@ Prometheus metrics exported by JupyterHub
Read https://prometheus.io/docs/practices/naming/ for naming
conventions for metrics & labels. We generally prefer naming them
`jupyterhub_<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
`<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
the duration (in seconds) of servers spawning would be called
jupyterhub_server_spawn_duration_seconds.
server_spawn_duration_seconds.
A namespace prefix is always added, so this metric is accessed as
`jupyterhub_server_spawn_duration_seconds` by default.
We also create an Enum for each 'status' type label in every metric
we collect. This is to make sure that the metrics exist regardless
@@ -19,6 +21,8 @@ them manually here.
added ``jupyterhub_`` prefix to metric names.
"""
import os
from datetime import timedelta
from enum import Enum
@@ -30,49 +34,66 @@ from traitlets.config import LoggingConfigurable
from . import orm
from .utils import utcnow
metrics_prefix = os.getenv('JUPYTERHUB_METRICS_PREFIX', 'jupyterhub')
REQUEST_DURATION_SECONDS = Histogram(
'jupyterhub_request_duration_seconds',
'request duration for all HTTP requests',
'request_duration_seconds',
'Request duration for all HTTP requests',
['method', 'handler', 'code'],
namespace=metrics_prefix,
)
SERVER_SPAWN_DURATION_SECONDS = Histogram(
'jupyterhub_server_spawn_duration_seconds',
'time taken for server spawning operation',
'server_spawn_duration_seconds',
'Time taken for server spawning operation',
['status'],
# Use custom bucket sizes, since the default bucket ranges
# are meant for quick running processes. Spawns can take a while!
buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 180, 300, 600, float("inf")],
namespace=metrics_prefix,
)
RUNNING_SERVERS = Gauge(
'jupyterhub_running_servers', 'the number of user servers currently running'
'running_servers',
'The number of user servers currently running',
namespace=metrics_prefix,
)
TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
TOTAL_USERS = Gauge(
'total_users',
'Total number of users',
namespace=metrics_prefix,
)
ACTIVE_USERS = Gauge(
'jupyterhub_active_users',
'number of users who were active in the given time period',
'active_users',
'Number of users who were active in the given time period',
['period'],
namespace=metrics_prefix,
)
CHECK_ROUTES_DURATION_SECONDS = Histogram(
'jupyterhub_check_routes_duration_seconds',
'check_routes_duration_seconds',
'Time taken to validate all routes in proxy',
namespace=metrics_prefix,
)
HUB_STARTUP_DURATION_SECONDS = Histogram(
'jupyterhub_hub_startup_duration_seconds', 'Time taken for Hub to start'
'hub_startup_duration_seconds',
'Time taken for Hub to start',
namespace=metrics_prefix,
)
INIT_SPAWNERS_DURATION_SECONDS = Histogram(
'jupyterhub_init_spawners_duration_seconds', 'Time taken for spawners to initialize'
'init_spawners_duration_seconds',
'Time taken for spawners to initialize',
namespace=metrics_prefix,
)
PROXY_POLL_DURATION_SECONDS = Histogram(
'jupyterhub_proxy_poll_duration_seconds',
'duration for polling all routes from proxy',
'proxy_poll_duration_seconds',
'Duration for polling all routes from proxy',
namespace=metrics_prefix,
)
@@ -97,9 +118,10 @@ for s in ServerSpawnStatus:
PROXY_ADD_DURATION_SECONDS = Histogram(
'jupyterhub_proxy_add_duration_seconds',
'duration for adding user routes to proxy',
'proxy_add_duration_seconds',
'Duration for adding user routes to proxy',
['status'],
namespace=metrics_prefix,
)
@@ -120,9 +142,10 @@ for s in ProxyAddStatus:
SERVER_POLL_DURATION_SECONDS = Histogram(
'jupyterhub_server_poll_duration_seconds',
'time taken to poll if server is running',
'server_poll_duration_seconds',
'Time taken to poll if server is running',
['status'],
namespace=metrics_prefix,
)
@@ -147,9 +170,10 @@ for s in ServerPollStatus:
SERVER_STOP_DURATION_SECONDS = Histogram(
'jupyterhub_server_stop_seconds',
'time taken for server stopping operation',
'server_stop_seconds',
'Time taken for server stopping operation',
['status'],
namespace=metrics_prefix,
)
@@ -170,9 +194,10 @@ for s in ServerStopStatus:
PROXY_DELETE_DURATION_SECONDS = Histogram(
'jupyterhub_proxy_delete_duration_seconds',
'duration for deleting user routes from proxy',
'proxy_delete_duration_seconds',
'Duration for deleting user routes from proxy',
['status'],
namespace=metrics_prefix,
)
@@ -239,7 +264,7 @@ class PeriodicMetricsCollector(LoggingConfigurable):
help="""
Enable active_users prometheus metric.
Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
Populates a `active_users` prometheus metric, with a label `period` that counts the time period
over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
""",
config=True,

View File

@@ -10,6 +10,18 @@ from ..utils import utcnow
from .utils import add_user, api_request, get_page
@pytest.mark.parametrize(
"metric_object, expected_names",
[
(metrics.TOTAL_USERS, ['jupyterhub_total_users']),
(metrics.REQUEST_DURATION_SECONDS, ['jupyterhub_request_duration_seconds']),
],
)
def test_metric_names(metric_object, expected_names):
for metric, expected_name in zip(metric_object.describe(), expected_names):
assert metric.name == expected_name
async def test_total_users(app):
num_users = app.db.query(orm.User).count()
sample = metrics.TOTAL_USERS.collect()[0].samples[0]

View File

@@ -7,7 +7,7 @@ jupyter_telemetry>=0.1.0
oauthlib>=3.0
packaging
pamela>=1.1.0; sys_platform != 'win32'
prometheus_client>=0.4.0
prometheus_client>=0.5.0
psutil>=5.6.5; sys_platform == 'win32'
python-dateutil
requests