mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-18 15:33:02 +00:00
Merge pull request #4214 from yuvipanda/metricsss
Add active users prometheus metrics
This commit is contained in:
@@ -74,6 +74,7 @@ from .metrics import (
|
|||||||
INIT_SPAWNERS_DURATION_SECONDS,
|
INIT_SPAWNERS_DURATION_SECONDS,
|
||||||
RUNNING_SERVERS,
|
RUNNING_SERVERS,
|
||||||
TOTAL_USERS,
|
TOTAL_USERS,
|
||||||
|
PeriodicMetricsCollector,
|
||||||
)
|
)
|
||||||
from .oauth.provider import make_provider
|
from .oauth.provider import make_provider
|
||||||
from .objects import Hub, Server
|
from .objects import Hub, Server
|
||||||
@@ -2914,6 +2915,8 @@ class JupyterHub(Application):
|
|||||||
await self.proxy.check_routes(self.users, self._service_map)
|
await self.proxy.check_routes(self.users, self._service_map)
|
||||||
|
|
||||||
asyncio.ensure_future(finish_init_spawners())
|
asyncio.ensure_future(finish_init_spawners())
|
||||||
|
metrics_updater = PeriodicMetricsCollector(parent=self, db=self.db)
|
||||||
|
metrics_updater.start()
|
||||||
|
|
||||||
async def cleanup(self):
|
async def cleanup(self):
|
||||||
"""Shutdown managed services and various subprocesses. Cleanup runtime files."""
|
"""Shutdown managed services and various subprocesses. Cleanup runtime files."""
|
||||||
|
@@ -19,9 +19,16 @@ them manually here.
|
|||||||
|
|
||||||
added ``jupyterhub_`` prefix to metric names.
|
added ``jupyterhub_`` prefix to metric names.
|
||||||
"""
|
"""
|
||||||
|
from datetime import timedelta
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from prometheus_client import Gauge, Histogram
|
from prometheus_client import Gauge, Histogram
|
||||||
|
from tornado.ioloop import PeriodicCallback
|
||||||
|
from traitlets import Any, Bool, Integer
|
||||||
|
from traitlets.config import LoggingConfigurable
|
||||||
|
|
||||||
|
from . import orm
|
||||||
|
from .utils import utcnow
|
||||||
|
|
||||||
REQUEST_DURATION_SECONDS = Histogram(
|
REQUEST_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_request_duration_seconds',
|
'jupyterhub_request_duration_seconds',
|
||||||
@@ -44,6 +51,12 @@ RUNNING_SERVERS = Gauge(
|
|||||||
|
|
||||||
TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
|
TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
|
||||||
|
|
||||||
|
ACTIVE_USERS = Gauge(
|
||||||
|
'jupyterhub_active_users',
|
||||||
|
'number of users who were active in the given time period',
|
||||||
|
['period'],
|
||||||
|
)
|
||||||
|
|
||||||
CHECK_ROUTES_DURATION_SECONDS = Histogram(
|
CHECK_ROUTES_DURATION_SECONDS = Histogram(
|
||||||
'jupyterhub_check_routes_duration_seconds',
|
'jupyterhub_check_routes_duration_seconds',
|
||||||
'Time taken to validate all routes in proxy',
|
'Time taken to validate all routes in proxy',
|
||||||
@@ -179,6 +192,20 @@ for s in ProxyDeleteStatus:
|
|||||||
PROXY_DELETE_DURATION_SECONDS.labels(status=s)
|
PROXY_DELETE_DURATION_SECONDS.labels(status=s)
|
||||||
|
|
||||||
|
|
||||||
|
class ActiveUserPeriods(Enum):
|
||||||
|
"""
|
||||||
|
Possible values for 'period' label of ACTIVE_USERS
|
||||||
|
"""
|
||||||
|
|
||||||
|
twenty_four_hours = '24h'
|
||||||
|
seven_days = '7d'
|
||||||
|
thirty_days = '30d'
|
||||||
|
|
||||||
|
|
||||||
|
for s in ActiveUserPeriods:
|
||||||
|
ACTIVE_USERS.labels(period=s.value)
|
||||||
|
|
||||||
|
|
||||||
def prometheus_log_method(handler):
|
def prometheus_log_method(handler):
|
||||||
"""
|
"""
|
||||||
Tornado log handler for recording RED metrics.
|
Tornado log handler for recording RED metrics.
|
||||||
@@ -200,3 +227,69 @@ def prometheus_log_method(handler):
|
|||||||
handler=f'{handler.__class__.__module__}.{type(handler).__name__}',
|
handler=f'{handler.__class__.__module__}.{type(handler).__name__}',
|
||||||
code=handler.get_status(),
|
code=handler.get_status(),
|
||||||
).observe(handler.request.request_time())
|
).observe(handler.request.request_time())
|
||||||
|
|
||||||
|
|
||||||
|
class PeriodicMetricsCollector(LoggingConfigurable):
|
||||||
|
"""
|
||||||
|
Collect metrics to be calculated periodically
|
||||||
|
"""
|
||||||
|
|
||||||
|
active_users_enabled = Bool(
|
||||||
|
True,
|
||||||
|
help="""
|
||||||
|
Enable active_users prometheus metric.
|
||||||
|
|
||||||
|
Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
|
||||||
|
over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
|
||||||
|
""",
|
||||||
|
config=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
active_users_update_interval = Integer(
|
||||||
|
60 * 60,
|
||||||
|
help="""
|
||||||
|
Number of seconds between updating active_users metrics.
|
||||||
|
|
||||||
|
To avoid extra load on the database, this is only calculated periodically rather than
|
||||||
|
at per-minute intervals. Defaults to once an hour.
|
||||||
|
""",
|
||||||
|
config=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
db = Any(help="SQLAlchemy db session to use for performing queries")
|
||||||
|
|
||||||
|
def update_active_users(self):
|
||||||
|
"""Update active users metrics."""
|
||||||
|
|
||||||
|
# All the metrics should be based off a cutoff from a *fixed* point, so we calculate
|
||||||
|
# the fixed point here - and then calculate the individual cutoffs in relation to this
|
||||||
|
# fixed point.
|
||||||
|
now = utcnow()
|
||||||
|
cutoffs = {
|
||||||
|
ActiveUserPeriods.twenty_four_hours: now - timedelta(hours=24),
|
||||||
|
ActiveUserPeriods.seven_days: now - timedelta(days=7),
|
||||||
|
ActiveUserPeriods.thirty_days: now - timedelta(days=30),
|
||||||
|
}
|
||||||
|
for period, cutoff in cutoffs.items():
|
||||||
|
value = (
|
||||||
|
self.db.query(orm.User).filter(orm.User.last_activity >= cutoff).count()
|
||||||
|
)
|
||||||
|
|
||||||
|
self.log.info(f'Found {value} active users in the last {period}')
|
||||||
|
ACTIVE_USERS.labels(period=period.value).set(value)
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
"""
|
||||||
|
Start the periodic update process
|
||||||
|
"""
|
||||||
|
if self.active_users_enabled:
|
||||||
|
# Setup periodic refresh of the metric
|
||||||
|
pc = PeriodicCallback(
|
||||||
|
self.update_active_users,
|
||||||
|
self.active_users_update_interval * 1000,
|
||||||
|
jitter=0.01,
|
||||||
|
)
|
||||||
|
pc.start()
|
||||||
|
|
||||||
|
# Update the metrics once on startup too
|
||||||
|
self.update_active_users()
|
||||||
|
@@ -1,11 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
|
from datetime import timedelta
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from jupyterhub import metrics, orm, roles
|
from jupyterhub import metrics, orm, roles
|
||||||
|
|
||||||
from .utils import api_request, get_page
|
from ..utils import utcnow
|
||||||
|
from .utils import add_user, api_request, get_page
|
||||||
|
|
||||||
|
|
||||||
async def test_total_users(app):
|
async def test_total_users(app):
|
||||||
@@ -73,3 +75,65 @@ async def test_metrics_auth(
|
|||||||
else:
|
else:
|
||||||
assert r.status_code == 403
|
assert r.status_code == 403
|
||||||
assert 'read:metrics' in r.text
|
assert 'read:metrics' in r.text
|
||||||
|
|
||||||
|
|
||||||
|
async def test_active_users(app):
|
||||||
|
db = app.db
|
||||||
|
collector = metrics.PeriodicMetricsCollector(db=db)
|
||||||
|
collector.update_active_users()
|
||||||
|
now = utcnow()
|
||||||
|
|
||||||
|
def collect():
|
||||||
|
samples = metrics.ACTIVE_USERS.collect()[0].samples
|
||||||
|
by_period = {
|
||||||
|
metrics.ActiveUserPeriods(sample.labels["period"]): sample.value
|
||||||
|
for sample in samples
|
||||||
|
}
|
||||||
|
print(by_period)
|
||||||
|
return by_period
|
||||||
|
|
||||||
|
baseline = collect()
|
||||||
|
|
||||||
|
for i, offset in enumerate(
|
||||||
|
[
|
||||||
|
None,
|
||||||
|
# in 24h
|
||||||
|
timedelta(hours=23, minutes=30),
|
||||||
|
# in 7d
|
||||||
|
timedelta(hours=24, minutes=1),
|
||||||
|
timedelta(days=6, hours=23, minutes=30),
|
||||||
|
# in 30d
|
||||||
|
timedelta(days=7, minutes=1),
|
||||||
|
timedelta(days=29, hours=23, minutes=30),
|
||||||
|
# not in any
|
||||||
|
timedelta(days=30, minutes=1),
|
||||||
|
]
|
||||||
|
):
|
||||||
|
user = add_user(db, name=f"active-{i}")
|
||||||
|
if offset:
|
||||||
|
user.last_activity = now - offset
|
||||||
|
else:
|
||||||
|
user.last_activity = None
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
# collect before update is called, don't include new users
|
||||||
|
counts = collect()
|
||||||
|
for period in metrics.ActiveUserPeriods:
|
||||||
|
assert period in counts
|
||||||
|
assert counts[period] == baseline[period]
|
||||||
|
|
||||||
|
# collect after updates, check updated counts
|
||||||
|
collector.update_active_users()
|
||||||
|
counts = collect()
|
||||||
|
assert (
|
||||||
|
counts[metrics.ActiveUserPeriods.twenty_four_hours]
|
||||||
|
== baseline[metrics.ActiveUserPeriods.twenty_four_hours] + 1
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
counts[metrics.ActiveUserPeriods.seven_days]
|
||||||
|
== baseline[metrics.ActiveUserPeriods.seven_days] + 3
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
counts[metrics.ActiveUserPeriods.thirty_days]
|
||||||
|
== baseline[metrics.ActiveUserPeriods.thirty_days] + 5
|
||||||
|
)
|
||||||
|
Reference in New Issue
Block a user