mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-08 10:34:10 +00:00
Merge pull request #4525 from danilopeixoto/metrics-prefix
Add `JUPYTERHUB_METRICS_PREFIX` environment variable to customize metrics prefix
This commit is contained in:
@@ -18,3 +18,17 @@ tool like [Grafana](https://grafana.com).
|
|||||||
|
|
||||||
/reference/metrics
|
/reference/metrics
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Customizing the metrics prefix
|
||||||
|
|
||||||
|
JupyterHub metrics all have a `jupyterhub_` prefix.
|
||||||
|
As of JupyterHub 5.0, this can be overridden with `$JUPYTERHUB_METRICS_PREFIX` environment variable
|
||||||
|
in the Hub's environment.
|
||||||
|
|
||||||
|
For example,
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export JUPYTERHUB_METRICS_PREFIX=jupyterhub_prod
|
||||||
|
```
|
||||||
|
|
||||||
|
would result in the metric `jupyterhub_prod_active_users`, etc.
|
||||||
|
@@ -3,9 +3,11 @@ Prometheus metrics exported by JupyterHub
|
|||||||
|
|
||||||
Read https://prometheus.io/docs/practices/naming/ for naming
|
Read https://prometheus.io/docs/practices/naming/ for naming
|
||||||
conventions for metrics & labels. We generally prefer naming them
|
conventions for metrics & labels. We generally prefer naming them
|
||||||
`jupyterhub_<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
|
`<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
|
||||||
the duration (in seconds) of servers spawning would be called
|
the duration (in seconds) of servers spawning would be called
|
||||||
jupyterhub_server_spawn_duration_seconds.
|
server_spawn_duration_seconds.
|
||||||
|
A namespace prefix is always added, so this metric is accessed as
|
||||||
|
`jupyterhub_server_spawn_duration_seconds` by default.
|
||||||
|
|
||||||
We also create an Enum for each 'status' type label in every metric
|
We also create an Enum for each 'status' type label in every metric
|
||||||
we collect. This is to make sure that the metrics exist regardless
|
we collect. This is to make sure that the metrics exist regardless
|
||||||
@@ -19,6 +21,8 @@ them manually here.
|
|||||||
|
|
||||||
added ``jupyterhub_`` prefix to metric names.
|
added ``jupyterhub_`` prefix to metric names.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
@@ -30,49 +34,66 @@ from traitlets.config import LoggingConfigurable
|
|||||||
from . import orm
|
from . import orm
|
||||||
from .utils import utcnow
|
from .utils import utcnow
|
||||||
|
|
||||||
|
metrics_prefix = os.getenv('JUPYTERHUB_METRICS_PREFIX', 'jupyterhub')
|
||||||
|
|
||||||
REQUEST_DURATION_SECONDS = Histogram(
|
REQUEST_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_request_duration_seconds',
|
'request_duration_seconds',
|
||||||
'request duration for all HTTP requests',
|
'Request duration for all HTTP requests',
|
||||||
['method', 'handler', 'code'],
|
['method', 'handler', 'code'],
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
SERVER_SPAWN_DURATION_SECONDS = Histogram(
|
SERVER_SPAWN_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_server_spawn_duration_seconds',
|
'server_spawn_duration_seconds',
|
||||||
'time taken for server spawning operation',
|
'Time taken for server spawning operation',
|
||||||
['status'],
|
['status'],
|
||||||
# Use custom bucket sizes, since the default bucket ranges
|
# Use custom bucket sizes, since the default bucket ranges
|
||||||
# are meant for quick running processes. Spawns can take a while!
|
# are meant for quick running processes. Spawns can take a while!
|
||||||
buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 180, 300, 600, float("inf")],
|
buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, 180, 300, 600, float("inf")],
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
RUNNING_SERVERS = Gauge(
|
RUNNING_SERVERS = Gauge(
|
||||||
'jupyterhub_running_servers', 'the number of user servers currently running'
|
'running_servers',
|
||||||
|
'The number of user servers currently running',
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
|
TOTAL_USERS = Gauge(
|
||||||
|
'total_users',
|
||||||
|
'Total number of users',
|
||||||
|
namespace=metrics_prefix,
|
||||||
|
)
|
||||||
|
|
||||||
ACTIVE_USERS = Gauge(
|
ACTIVE_USERS = Gauge(
|
||||||
'jupyterhub_active_users',
|
'active_users',
|
||||||
'number of users who were active in the given time period',
|
'Number of users who were active in the given time period',
|
||||||
['period'],
|
['period'],
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
CHECK_ROUTES_DURATION_SECONDS = Histogram(
|
CHECK_ROUTES_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_check_routes_duration_seconds',
|
'check_routes_duration_seconds',
|
||||||
'Time taken to validate all routes in proxy',
|
'Time taken to validate all routes in proxy',
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
HUB_STARTUP_DURATION_SECONDS = Histogram(
|
HUB_STARTUP_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_hub_startup_duration_seconds', 'Time taken for Hub to start'
|
'hub_startup_duration_seconds',
|
||||||
|
'Time taken for Hub to start',
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
INIT_SPAWNERS_DURATION_SECONDS = Histogram(
|
INIT_SPAWNERS_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_init_spawners_duration_seconds', 'Time taken for spawners to initialize'
|
'init_spawners_duration_seconds',
|
||||||
|
'Time taken for spawners to initialize',
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
PROXY_POLL_DURATION_SECONDS = Histogram(
|
PROXY_POLL_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_proxy_poll_duration_seconds',
|
'proxy_poll_duration_seconds',
|
||||||
'duration for polling all routes from proxy',
|
'Duration for polling all routes from proxy',
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -97,9 +118,10 @@ for s in ServerSpawnStatus:
|
|||||||
|
|
||||||
|
|
||||||
PROXY_ADD_DURATION_SECONDS = Histogram(
|
PROXY_ADD_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_proxy_add_duration_seconds',
|
'proxy_add_duration_seconds',
|
||||||
'duration for adding user routes to proxy',
|
'Duration for adding user routes to proxy',
|
||||||
['status'],
|
['status'],
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -120,9 +142,10 @@ for s in ProxyAddStatus:
|
|||||||
|
|
||||||
|
|
||||||
SERVER_POLL_DURATION_SECONDS = Histogram(
|
SERVER_POLL_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_server_poll_duration_seconds',
|
'server_poll_duration_seconds',
|
||||||
'time taken to poll if server is running',
|
'Time taken to poll if server is running',
|
||||||
['status'],
|
['status'],
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -147,9 +170,10 @@ for s in ServerPollStatus:
|
|||||||
|
|
||||||
|
|
||||||
SERVER_STOP_DURATION_SECONDS = Histogram(
|
SERVER_STOP_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_server_stop_seconds',
|
'server_stop_seconds',
|
||||||
'time taken for server stopping operation',
|
'Time taken for server stopping operation',
|
||||||
['status'],
|
['status'],
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -170,9 +194,10 @@ for s in ServerStopStatus:
|
|||||||
|
|
||||||
|
|
||||||
PROXY_DELETE_DURATION_SECONDS = Histogram(
|
PROXY_DELETE_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_proxy_delete_duration_seconds',
|
'proxy_delete_duration_seconds',
|
||||||
'duration for deleting user routes from proxy',
|
'Duration for deleting user routes from proxy',
|
||||||
['status'],
|
['status'],
|
||||||
|
namespace=metrics_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -239,7 +264,7 @@ class PeriodicMetricsCollector(LoggingConfigurable):
|
|||||||
help="""
|
help="""
|
||||||
Enable active_users prometheus metric.
|
Enable active_users prometheus metric.
|
||||||
|
|
||||||
Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
|
Populates a `active_users` prometheus metric, with a label `period` that counts the time period
|
||||||
over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
|
over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
|
||||||
""",
|
""",
|
||||||
config=True,
|
config=True,
|
||||||
|
@@ -10,6 +10,18 @@ from ..utils import utcnow
|
|||||||
from .utils import add_user, api_request, get_page
|
from .utils import add_user, api_request, get_page
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"metric_object, expected_names",
|
||||||
|
[
|
||||||
|
(metrics.TOTAL_USERS, ['jupyterhub_total_users']),
|
||||||
|
(metrics.REQUEST_DURATION_SECONDS, ['jupyterhub_request_duration_seconds']),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_metric_names(metric_object, expected_names):
|
||||||
|
for metric, expected_name in zip(metric_object.describe(), expected_names):
|
||||||
|
assert metric.name == expected_name
|
||||||
|
|
||||||
|
|
||||||
async def test_total_users(app):
|
async def test_total_users(app):
|
||||||
num_users = app.db.query(orm.User).count()
|
num_users = app.db.query(orm.User).count()
|
||||||
sample = metrics.TOTAL_USERS.collect()[0].samples[0]
|
sample = metrics.TOTAL_USERS.collect()[0].samples[0]
|
||||||
|
@@ -7,7 +7,7 @@ jupyter_telemetry>=0.1.0
|
|||||||
oauthlib>=3.0
|
oauthlib>=3.0
|
||||||
packaging
|
packaging
|
||||||
pamela>=1.1.0; sys_platform != 'win32'
|
pamela>=1.1.0; sys_platform != 'win32'
|
||||||
prometheus_client>=0.4.0
|
prometheus_client>=0.5.0
|
||||||
psutil>=5.6.5; sys_platform == 'win32'
|
psutil>=5.6.5; sys_platform == 'win32'
|
||||||
python-dateutil
|
python-dateutil
|
||||||
requests
|
requests
|
||||||
|
Reference in New Issue
Block a user