mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-08 02:24:08 +00:00
Merge pull request #4525 from danilopeixoto/metrics-prefix
Add `JUPYTERHUB_METRICS_PREFIX` environment variable to customize metrics prefix
This commit is contained in:
@@ -18,3 +18,17 @@ tool like [Grafana](https://grafana.com).
|
||||
|
||||
/reference/metrics
|
||||
```
|
||||
|
||||
## Customizing the metrics prefix
|
||||
|
||||
JupyterHub metrics all have a `jupyterhub_` prefix.
|
||||
As of JupyterHub 5.0, this can be overridden with `$JUPYTERHUB_METRICS_PREFIX` environment variable
|
||||
in the Hub's environment.
|
||||
|
||||
For example,
|
||||
|
||||
```bash
|
||||
export JUPYTERHUB_METRICS_PREFIX=jupyterhub_prod
|
||||
```
|
||||
|
||||
would result in the metric `jupyterhub_prod_active_users`, etc.
|
||||
|
@@ -3,9 +3,11 @@ Prometheus metrics exported by JupyterHub
|
||||
|
||||
Read https://prometheus.io/docs/practices/naming/ for naming
|
||||
conventions for metrics & labels. We generally prefer naming them
|
||||
`jupyterhub_<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
|
||||
`<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
|
||||
the duration (in seconds) of servers spawning would be called
|
||||
jupyterhub_server_spawn_duration_seconds.
|
||||
server_spawn_duration_seconds.
|
||||
A namespace prefix is always added, so this metric is accessed as
|
||||
`jupyterhub_server_spawn_duration_seconds` by default.
|
||||
|
||||
We also create an Enum for each 'status' type label in every metric
|
||||
we collect. This is to make sure that the metrics exist regardless
|
||||
@@ -19,6 +21,8 @@ them manually here.
|
||||
|
||||
added ``jupyterhub_`` prefix to metric names.
|
||||
"""
|
||||
|
||||
import os
|
||||
from datetime import timedelta
|
||||
from enum import Enum
|
||||
|
||||
@@ -30,49 +34,66 @@ from traitlets.config import LoggingConfigurable
|
||||
from . import orm
|
||||
from .utils import utcnow
|
||||
|
||||
metrics_prefix = os.getenv('JUPYTERHUB_METRICS_PREFIX', 'jupyterhub')
|
||||
|
||||
REQUEST_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_request_duration_seconds',
|
||||
'request duration for all HTTP requests',
|
||||
'request_duration_seconds',
|
||||
'Request duration for all HTTP requests',
|
||||
['method', 'handler', 'code'],
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
SERVER_SPAWN_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_server_spawn_duration_seconds',
|
||||
'time taken for server spawning operation',
|
||||
'server_spawn_duration_seconds',
|
||||
'Time taken for server spawning operation',
|
||||
['status'],
|
||||
# Use custom bucket sizes, since the default bucket ranges
|
||||
# are meant for quick running processes. Spawns can take a while!
|
||||
buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 180, 300, 600, float("inf")],
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
RUNNING_SERVERS = Gauge(
|
||||
'jupyterhub_running_servers', 'the number of user servers currently running'
|
||||
'running_servers',
|
||||
'The number of user servers currently running',
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
|
||||
TOTAL_USERS = Gauge(
|
||||
'total_users',
|
||||
'Total number of users',
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
ACTIVE_USERS = Gauge(
|
||||
'jupyterhub_active_users',
|
||||
'number of users who were active in the given time period',
|
||||
'active_users',
|
||||
'Number of users who were active in the given time period',
|
||||
['period'],
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
CHECK_ROUTES_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_check_routes_duration_seconds',
|
||||
'check_routes_duration_seconds',
|
||||
'Time taken to validate all routes in proxy',
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
HUB_STARTUP_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_hub_startup_duration_seconds', 'Time taken for Hub to start'
|
||||
'hub_startup_duration_seconds',
|
||||
'Time taken for Hub to start',
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
INIT_SPAWNERS_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_init_spawners_duration_seconds', 'Time taken for spawners to initialize'
|
||||
'init_spawners_duration_seconds',
|
||||
'Time taken for spawners to initialize',
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
PROXY_POLL_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_proxy_poll_duration_seconds',
|
||||
'duration for polling all routes from proxy',
|
||||
'proxy_poll_duration_seconds',
|
||||
'Duration for polling all routes from proxy',
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
|
||||
@@ -97,9 +118,10 @@ for s in ServerSpawnStatus:
|
||||
|
||||
|
||||
PROXY_ADD_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_proxy_add_duration_seconds',
|
||||
'duration for adding user routes to proxy',
|
||||
'proxy_add_duration_seconds',
|
||||
'Duration for adding user routes to proxy',
|
||||
['status'],
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
|
||||
@@ -120,9 +142,10 @@ for s in ProxyAddStatus:
|
||||
|
||||
|
||||
SERVER_POLL_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_server_poll_duration_seconds',
|
||||
'time taken to poll if server is running',
|
||||
'server_poll_duration_seconds',
|
||||
'Time taken to poll if server is running',
|
||||
['status'],
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
|
||||
@@ -147,9 +170,10 @@ for s in ServerPollStatus:
|
||||
|
||||
|
||||
SERVER_STOP_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_server_stop_seconds',
|
||||
'time taken for server stopping operation',
|
||||
'server_stop_seconds',
|
||||
'Time taken for server stopping operation',
|
||||
['status'],
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
|
||||
@@ -170,9 +194,10 @@ for s in ServerStopStatus:
|
||||
|
||||
|
||||
PROXY_DELETE_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_proxy_delete_duration_seconds',
|
||||
'duration for deleting user routes from proxy',
|
||||
'proxy_delete_duration_seconds',
|
||||
'Duration for deleting user routes from proxy',
|
||||
['status'],
|
||||
namespace=metrics_prefix,
|
||||
)
|
||||
|
||||
|
||||
@@ -239,7 +264,7 @@ class PeriodicMetricsCollector(LoggingConfigurable):
|
||||
help="""
|
||||
Enable active_users prometheus metric.
|
||||
|
||||
Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
|
||||
Populates a `active_users` prometheus metric, with a label `period` that counts the time period
|
||||
over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
|
||||
""",
|
||||
config=True,
|
||||
|
@@ -10,6 +10,18 @@ from ..utils import utcnow
|
||||
from .utils import add_user, api_request, get_page
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"metric_object, expected_names",
|
||||
[
|
||||
(metrics.TOTAL_USERS, ['jupyterhub_total_users']),
|
||||
(metrics.REQUEST_DURATION_SECONDS, ['jupyterhub_request_duration_seconds']),
|
||||
],
|
||||
)
|
||||
def test_metric_names(metric_object, expected_names):
|
||||
for metric, expected_name in zip(metric_object.describe(), expected_names):
|
||||
assert metric.name == expected_name
|
||||
|
||||
|
||||
async def test_total_users(app):
|
||||
num_users = app.db.query(orm.User).count()
|
||||
sample = metrics.TOTAL_USERS.collect()[0].samples[0]
|
||||
|
@@ -7,7 +7,7 @@ jupyter_telemetry>=0.1.0
|
||||
oauthlib>=3.0
|
||||
packaging
|
||||
pamela>=1.1.0; sys_platform != 'win32'
|
||||
prometheus_client>=0.4.0
|
||||
prometheus_client>=0.5.0
|
||||
psutil>=5.6.5; sys_platform == 'win32'
|
||||
python-dateutil
|
||||
requests
|
||||
|
Reference in New Issue
Block a user