mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-18 07:23:00 +00:00
Merge pull request #4214 from yuvipanda/metricsss
Add active users prometheus metrics
This commit is contained in:
@@ -74,6 +74,7 @@ from .metrics import (
|
||||
INIT_SPAWNERS_DURATION_SECONDS,
|
||||
RUNNING_SERVERS,
|
||||
TOTAL_USERS,
|
||||
PeriodicMetricsCollector,
|
||||
)
|
||||
from .oauth.provider import make_provider
|
||||
from .objects import Hub, Server
|
||||
@@ -2914,6 +2915,8 @@ class JupyterHub(Application):
|
||||
await self.proxy.check_routes(self.users, self._service_map)
|
||||
|
||||
asyncio.ensure_future(finish_init_spawners())
|
||||
metrics_updater = PeriodicMetricsCollector(parent=self, db=self.db)
|
||||
metrics_updater.start()
|
||||
|
||||
async def cleanup(self):
|
||||
"""Shutdown managed services and various subprocesses. Cleanup runtime files."""
|
||||
|
@@ -19,9 +19,16 @@ them manually here.
|
||||
|
||||
added ``jupyterhub_`` prefix to metric names.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
from enum import Enum
|
||||
|
||||
from prometheus_client import Gauge, Histogram
|
||||
from tornado.ioloop import PeriodicCallback
|
||||
from traitlets import Any, Bool, Integer
|
||||
from traitlets.config import LoggingConfigurable
|
||||
|
||||
from . import orm
|
||||
from .utils import utcnow
|
||||
|
||||
REQUEST_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_request_duration_seconds',
|
||||
@@ -44,6 +51,12 @@ RUNNING_SERVERS = Gauge(
|
||||
|
||||
TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
|
||||
|
||||
ACTIVE_USERS = Gauge(
|
||||
'jupyterhub_active_users',
|
||||
'number of users who were active in the given time period',
|
||||
['period'],
|
||||
)
|
||||
|
||||
CHECK_ROUTES_DURATION_SECONDS = Histogram(
|
||||
'jupyterhub_check_routes_duration_seconds',
|
||||
'Time taken to validate all routes in proxy',
|
||||
@@ -179,6 +192,20 @@ for s in ProxyDeleteStatus:
|
||||
PROXY_DELETE_DURATION_SECONDS.labels(status=s)
|
||||
|
||||
|
||||
class ActiveUserPeriods(Enum):
|
||||
"""
|
||||
Possible values for 'period' label of ACTIVE_USERS
|
||||
"""
|
||||
|
||||
twenty_four_hours = '24h'
|
||||
seven_days = '7d'
|
||||
thirty_days = '30d'
|
||||
|
||||
|
||||
for s in ActiveUserPeriods:
|
||||
ACTIVE_USERS.labels(period=s.value)
|
||||
|
||||
|
||||
def prometheus_log_method(handler):
|
||||
"""
|
||||
Tornado log handler for recording RED metrics.
|
||||
@@ -200,3 +227,69 @@ def prometheus_log_method(handler):
|
||||
handler=f'{handler.__class__.__module__}.{type(handler).__name__}',
|
||||
code=handler.get_status(),
|
||||
).observe(handler.request.request_time())
|
||||
|
||||
|
||||
class PeriodicMetricsCollector(LoggingConfigurable):
|
||||
"""
|
||||
Collect metrics to be calculated periodically
|
||||
"""
|
||||
|
||||
active_users_enabled = Bool(
|
||||
True,
|
||||
help="""
|
||||
Enable active_users prometheus metric.
|
||||
|
||||
Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
|
||||
over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
|
||||
""",
|
||||
config=True,
|
||||
)
|
||||
|
||||
active_users_update_interval = Integer(
|
||||
60 * 60,
|
||||
help="""
|
||||
Number of seconds between updating active_users metrics.
|
||||
|
||||
To avoid extra load on the database, this is only calculated periodically rather than
|
||||
at per-minute intervals. Defaults to once an hour.
|
||||
""",
|
||||
config=True,
|
||||
)
|
||||
|
||||
db = Any(help="SQLAlchemy db session to use for performing queries")
|
||||
|
||||
def update_active_users(self):
|
||||
"""Update active users metrics."""
|
||||
|
||||
# All the metrics should be based off a cutoff from a *fixed* point, so we calculate
|
||||
# the fixed point here - and then calculate the individual cutoffs in relation to this
|
||||
# fixed point.
|
||||
now = utcnow()
|
||||
cutoffs = {
|
||||
ActiveUserPeriods.twenty_four_hours: now - timedelta(hours=24),
|
||||
ActiveUserPeriods.seven_days: now - timedelta(days=7),
|
||||
ActiveUserPeriods.thirty_days: now - timedelta(days=30),
|
||||
}
|
||||
for period, cutoff in cutoffs.items():
|
||||
value = (
|
||||
self.db.query(orm.User).filter(orm.User.last_activity >= cutoff).count()
|
||||
)
|
||||
|
||||
self.log.info(f'Found {value} active users in the last {period}')
|
||||
ACTIVE_USERS.labels(period=period.value).set(value)
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Start the periodic update process
|
||||
"""
|
||||
if self.active_users_enabled:
|
||||
# Setup periodic refresh of the metric
|
||||
pc = PeriodicCallback(
|
||||
self.update_active_users,
|
||||
self.active_users_update_interval * 1000,
|
||||
jitter=0.01,
|
||||
)
|
||||
pc.start()
|
||||
|
||||
# Update the metrics once on startup too
|
||||
self.update_active_users()
|
||||
|
@@ -1,11 +1,13 @@
|
||||
import json
|
||||
from datetime import timedelta
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from jupyterhub import metrics, orm, roles
|
||||
|
||||
from .utils import api_request, get_page
|
||||
from ..utils import utcnow
|
||||
from .utils import add_user, api_request, get_page
|
||||
|
||||
|
||||
async def test_total_users(app):
|
||||
@@ -73,3 +75,65 @@ async def test_metrics_auth(
|
||||
else:
|
||||
assert r.status_code == 403
|
||||
assert 'read:metrics' in r.text
|
||||
|
||||
|
||||
async def test_active_users(app):
|
||||
db = app.db
|
||||
collector = metrics.PeriodicMetricsCollector(db=db)
|
||||
collector.update_active_users()
|
||||
now = utcnow()
|
||||
|
||||
def collect():
|
||||
samples = metrics.ACTIVE_USERS.collect()[0].samples
|
||||
by_period = {
|
||||
metrics.ActiveUserPeriods(sample.labels["period"]): sample.value
|
||||
for sample in samples
|
||||
}
|
||||
print(by_period)
|
||||
return by_period
|
||||
|
||||
baseline = collect()
|
||||
|
||||
for i, offset in enumerate(
|
||||
[
|
||||
None,
|
||||
# in 24h
|
||||
timedelta(hours=23, minutes=30),
|
||||
# in 7d
|
||||
timedelta(hours=24, minutes=1),
|
||||
timedelta(days=6, hours=23, minutes=30),
|
||||
# in 30d
|
||||
timedelta(days=7, minutes=1),
|
||||
timedelta(days=29, hours=23, minutes=30),
|
||||
# not in any
|
||||
timedelta(days=30, minutes=1),
|
||||
]
|
||||
):
|
||||
user = add_user(db, name=f"active-{i}")
|
||||
if offset:
|
||||
user.last_activity = now - offset
|
||||
else:
|
||||
user.last_activity = None
|
||||
db.commit()
|
||||
|
||||
# collect before update is called, don't include new users
|
||||
counts = collect()
|
||||
for period in metrics.ActiveUserPeriods:
|
||||
assert period in counts
|
||||
assert counts[period] == baseline[period]
|
||||
|
||||
# collect after updates, check updated counts
|
||||
collector.update_active_users()
|
||||
counts = collect()
|
||||
assert (
|
||||
counts[metrics.ActiveUserPeriods.twenty_four_hours]
|
||||
== baseline[metrics.ActiveUserPeriods.twenty_four_hours] + 1
|
||||
)
|
||||
assert (
|
||||
counts[metrics.ActiveUserPeriods.seven_days]
|
||||
== baseline[metrics.ActiveUserPeriods.seven_days] + 3
|
||||
)
|
||||
assert (
|
||||
counts[metrics.ActiveUserPeriods.thirty_days]
|
||||
== baseline[metrics.ActiveUserPeriods.thirty_days] + 5
|
||||
)
|
||||
|
Reference in New Issue
Block a user