Merge pull request #4525 from danilopeixoto/metrics-prefix

Add `JUPYTERHUB_METRICS_PREFIX` environment variable to customize metrics prefix
2025-10-08 10:34:10 +00:00 · 2023-08-10 12:48:14 +02:00
parent baaa558a84 94687e5215
commit a66801c424
4 changed files with 76 additions and 25 deletions
--- a/docs/source/reference/monitoring.md
+++ b/docs/source/reference/monitoring.md
@@ -18,3 +18,17 @@ tool like [Grafana](https://grafana.com).
 /reference/metrics
 ```
 ## Customizing the metrics prefix
 JupyterHub metrics all have a `jupyterhub_` prefix.
 As of JupyterHub 5.0, this can be overridden with `$JUPYTERHUB_METRICS_PREFIX` environment variable
 in the Hub's environment.
 For example,
 ```bash
 export JUPYTERHUB_METRICS_PREFIX=jupyterhub_prod
 ```
 would result in the metric `jupyterhub_prod_active_users`, etc.
--- a/jupyterhub/metrics.py
+++ b/jupyterhub/metrics.py
@@ -3,9 +3,11 @@ Prometheus metrics exported by JupyterHub
 Read https://prometheus.io/docs/practices/naming/ for naming
 conventions for metrics & labels. We generally prefer naming them
-`jupyterhub_<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
+`<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
 the duration (in seconds) of servers spawning would be called
-jupyterhub_server_spawn_duration_seconds.
+server_spawn_duration_seconds.
 A namespace prefix is always added, so this metric is accessed as
 `jupyterhub_server_spawn_duration_seconds` by default.
 We also create an Enum for each 'status' type label in every metric
 we collect. This is to make sure that the metrics exist regardless
@@ -19,6 +21,8 @@ them manually here.
    added ``jupyterhub_`` prefix to metric names.
 """
 import os
 from datetime import timedelta
 from enum import Enum
@@ -30,49 +34,66 @@ from traitlets.config import LoggingConfigurable
 from . import orm
 from .utils import utcnow
 metrics_prefix = os.getenv('JUPYTERHUB_METRICS_PREFIX', 'jupyterhub')
 REQUEST_DURATION_SECONDS = Histogram(
-    'jupyterhub_request_duration_seconds',
+    'request_duration_seconds',
-    'request duration for all HTTP requests',
+    'Request duration for all HTTP requests',
    ['method', 'handler', 'code'],
    namespace=metrics_prefix,
 )
 SERVER_SPAWN_DURATION_SECONDS = Histogram(
-    'jupyterhub_server_spawn_duration_seconds',
+    'server_spawn_duration_seconds',
-    'time taken for server spawning operation',
+    'Time taken for server spawning operation',
    ['status'],
    # Use custom bucket sizes, since the default bucket ranges
    # are meant for quick running processes. Spawns can take a while!
    buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 180, 300, 600, float("inf")],
    namespace=metrics_prefix,
 )
 RUNNING_SERVERS = Gauge(
-    'jupyterhub_running_servers', 'the number of user servers currently running'
+    'running_servers',
    'The number of user servers currently running',
    namespace=metrics_prefix,
 )
-TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
+TOTAL_USERS = Gauge(
    'total_users',
    'Total number of users',
    namespace=metrics_prefix,
 )
 ACTIVE_USERS = Gauge(
-    'jupyterhub_active_users',
+    'active_users',
-    'number of users who were active in the given time period',
+    'Number of users who were active in the given time period',
    ['period'],
    namespace=metrics_prefix,
 )
 CHECK_ROUTES_DURATION_SECONDS = Histogram(
-    'jupyterhub_check_routes_duration_seconds',
+    'check_routes_duration_seconds',
    'Time taken to validate all routes in proxy',
    namespace=metrics_prefix,
 )
 HUB_STARTUP_DURATION_SECONDS = Histogram(
-    'jupyterhub_hub_startup_duration_seconds', 'Time taken for Hub to start'
+    'hub_startup_duration_seconds',
    'Time taken for Hub to start',
    namespace=metrics_prefix,
 )
 INIT_SPAWNERS_DURATION_SECONDS = Histogram(
-    'jupyterhub_init_spawners_duration_seconds', 'Time taken for spawners to initialize'
+    'init_spawners_duration_seconds',
    'Time taken for spawners to initialize',
    namespace=metrics_prefix,
 )
 PROXY_POLL_DURATION_SECONDS = Histogram(
-    'jupyterhub_proxy_poll_duration_seconds',
+    'proxy_poll_duration_seconds',
-    'duration for polling all routes from proxy',
+    'Duration for polling all routes from proxy',
    namespace=metrics_prefix,
 )
@@ -97,9 +118,10 @@ for s in ServerSpawnStatus:
 PROXY_ADD_DURATION_SECONDS = Histogram(
-    'jupyterhub_proxy_add_duration_seconds',
+    'proxy_add_duration_seconds',
-    'duration for adding user routes to proxy',
+    'Duration for adding user routes to proxy',
    ['status'],
    namespace=metrics_prefix,
 )
@@ -120,9 +142,10 @@ for s in ProxyAddStatus:
 SERVER_POLL_DURATION_SECONDS = Histogram(
-    'jupyterhub_server_poll_duration_seconds',
+    'server_poll_duration_seconds',
-    'time taken to poll if server is running',
+    'Time taken to poll if server is running',
    ['status'],
    namespace=metrics_prefix,
 )
@@ -147,9 +170,10 @@ for s in ServerPollStatus:
 SERVER_STOP_DURATION_SECONDS = Histogram(
-    'jupyterhub_server_stop_seconds',
+    'server_stop_seconds',
-    'time taken for server stopping operation',
+    'Time taken for server stopping operation',
    ['status'],
    namespace=metrics_prefix,
 )
@@ -170,9 +194,10 @@ for s in ServerStopStatus:
 PROXY_DELETE_DURATION_SECONDS = Histogram(
-    'jupyterhub_proxy_delete_duration_seconds',
+    'proxy_delete_duration_seconds',
-    'duration for deleting user routes from proxy',
+    'Duration for deleting user routes from proxy',
    ['status'],
    namespace=metrics_prefix,
 )
@@ -239,7 +264,7 @@ class PeriodicMetricsCollector(LoggingConfigurable):
        help="""
        Enable active_users prometheus metric.
-        Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
+        Populates a `active_users` prometheus metric, with a label `period` that counts the time period
        over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
        """,
        config=True,
--- a/jupyterhub/tests/test_metrics.py
+++ b/jupyterhub/tests/test_metrics.py
@@ -10,6 +10,18 @@ from ..utils import utcnow
 from .utils import add_user, api_request, get_page
@pytest.mark.parametrize(
    "metric_object, expected_names",
    [
        (metrics.TOTAL_USERS, ['jupyterhub_total_users']),
        (metrics.REQUEST_DURATION_SECONDS, ['jupyterhub_request_duration_seconds']),
    ],
 )
 def test_metric_names(metric_object, expected_names):
    for metric, expected_name in zip(metric_object.describe(), expected_names):
        assert metric.name == expected_name
 async def test_total_users(app):
    num_users = app.db.query(orm.User).count()
    sample = metrics.TOTAL_USERS.collect()[0].samples[0]
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,7 @@ jupyter_telemetry>=0.1.0
 oauthlib>=3.0
 packaging
 pamela>=1.1.0; sys_platform != 'win32'
-prometheus_client>=0.4.0
+prometheus_client>=0.5.0
 psutil>=5.6.5; sys_platform == 'win32'
 python-dateutil
 requests