Merge pull request #4214 from yuvipanda/metricsss

Add active users prometheus metrics
2025-10-18 15:33:02 +00:00 · 2022-12-01 15:45:59 +01:00
parent dca8725876 58475ffcfd
commit 0d72280e5d
3 changed files with 161 additions and 1 deletions
--- a/jupyterhub/app.py
+++ b/jupyterhub/app.py
@@ -74,6 +74,7 @@ from .metrics import (
    INIT_SPAWNERS_DURATION_SECONDS,
    RUNNING_SERVERS,
    TOTAL_USERS,
    PeriodicMetricsCollector,
 )
 from .oauth.provider import make_provider
 from .objects import Hub, Server
@@ -2914,6 +2915,8 @@ class JupyterHub(Application):
                await self.proxy.check_routes(self.users, self._service_map)
            asyncio.ensure_future(finish_init_spawners())
        metrics_updater = PeriodicMetricsCollector(parent=self, db=self.db)
        metrics_updater.start()
    async def cleanup(self):
        """Shutdown managed services and various subprocesses. Cleanup runtime files."""
--- a/jupyterhub/metrics.py
+++ b/jupyterhub/metrics.py
@@ -19,9 +19,16 @@ them manually here.
    added ``jupyterhub_`` prefix to metric names.
 """
 from datetime import timedelta
 from enum import Enum
 from prometheus_client import Gauge, Histogram
 from tornado.ioloop import PeriodicCallback
 from traitlets import Any, Bool, Integer
 from traitlets.config import LoggingConfigurable
 from . import orm
 from .utils import utcnow
 REQUEST_DURATION_SECONDS = Histogram(
    'jupyterhub_request_duration_seconds',
@@ -44,6 +51,12 @@ RUNNING_SERVERS = Gauge(
 TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')
 ACTIVE_USERS = Gauge(
    'jupyterhub_active_users',
    'number of users who were active in the given time period',
    ['period'],
 )
 CHECK_ROUTES_DURATION_SECONDS = Histogram(
    'jupyterhub_check_routes_duration_seconds',
    'Time taken to validate all routes in proxy',
@@ -179,6 +192,20 @@ for s in ProxyDeleteStatus:
    PROXY_DELETE_DURATION_SECONDS.labels(status=s)
 class ActiveUserPeriods(Enum):
    """
    Possible values for 'period' label of ACTIVE_USERS
    """
    twenty_four_hours = '24h'
    seven_days = '7d'
    thirty_days = '30d'
 for s in ActiveUserPeriods:
    ACTIVE_USERS.labels(period=s.value)
 def prometheus_log_method(handler):
    """
    Tornado log handler for recording RED metrics.
@@ -200,3 +227,69 @@ def prometheus_log_method(handler):
        handler=f'{handler.__class__.__module__}.{type(handler).__name__}',
        code=handler.get_status(),
    ).observe(handler.request.request_time())
 class PeriodicMetricsCollector(LoggingConfigurable):
    """
    Collect metrics to be calculated periodically
    """
    active_users_enabled = Bool(
        True,
        help="""
        Enable active_users prometheus metric.
        Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
        over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
        """,
        config=True,
    )
    active_users_update_interval = Integer(
        60 * 60,
        help="""
        Number of seconds between updating active_users metrics.
        To avoid extra load on the database, this is only calculated periodically rather than
        at per-minute intervals. Defaults to once an hour.
        """,
        config=True,
    )
    db = Any(help="SQLAlchemy db session to use for performing queries")
    def update_active_users(self):
        """Update active users metrics."""
        # All the metrics should be based off a cutoff from a *fixed* point, so we calculate
        # the fixed point here - and then calculate the individual cutoffs in relation to this
        # fixed point.
        now = utcnow()
        cutoffs = {
            ActiveUserPeriods.twenty_four_hours: now - timedelta(hours=24),
            ActiveUserPeriods.seven_days: now - timedelta(days=7),
            ActiveUserPeriods.thirty_days: now - timedelta(days=30),
        }
        for period, cutoff in cutoffs.items():
            value = (
                self.db.query(orm.User).filter(orm.User.last_activity >= cutoff).count()
            )
            self.log.info(f'Found {value} active users in the last {period}')
            ACTIVE_USERS.labels(period=period.value).set(value)
    def start(self):
        """
        Start the periodic update process
        """
        if self.active_users_enabled:
            # Setup periodic refresh of the metric
            pc = PeriodicCallback(
                self.update_active_users,
                self.active_users_update_interval * 1000,
                jitter=0.01,
            )
            pc.start()
            # Update the metrics once on startup too
            self.update_active_users()
--- a/jupyterhub/tests/test_metrics.py
+++ b/jupyterhub/tests/test_metrics.py
@@ -1,11 +1,13 @@
 import json
 from datetime import timedelta
 from unittest import mock
 import pytest
 from jupyterhub import metrics, orm, roles
-from .utils import api_request, get_page
+from ..utils import utcnow
 from .utils import add_user, api_request, get_page
 async def test_total_users(app):
@@ -73,3 +75,65 @@ async def test_metrics_auth(
    else:
        assert r.status_code == 403
        assert 'read:metrics' in r.text
 async def test_active_users(app):
    db = app.db
    collector = metrics.PeriodicMetricsCollector(db=db)
    collector.update_active_users()
    now = utcnow()
    def collect():
        samples = metrics.ACTIVE_USERS.collect()[0].samples
        by_period = {
            metrics.ActiveUserPeriods(sample.labels["period"]): sample.value
            for sample in samples
        }
        print(by_period)
        return by_period
    baseline = collect()
    for i, offset in enumerate(
        [
            None,
            # in 24h
            timedelta(hours=23, minutes=30),
            # in 7d
            timedelta(hours=24, minutes=1),
            timedelta(days=6, hours=23, minutes=30),
            # in 30d
            timedelta(days=7, minutes=1),
            timedelta(days=29, hours=23, minutes=30),
            # not in any
            timedelta(days=30, minutes=1),
        ]
    ):
        user = add_user(db, name=f"active-{i}")
        if offset:
            user.last_activity = now - offset
        else:
            user.last_activity = None
        db.commit()
    # collect before update is called, don't include new users
    counts = collect()
    for period in metrics.ActiveUserPeriods:
        assert period in counts
        assert counts[period] == baseline[period]
    # collect after updates, check updated counts
    collector.update_active_users()
    counts = collect()
    assert (
        counts[metrics.ActiveUserPeriods.twenty_four_hours]
        == baseline[metrics.ActiveUserPeriods.twenty_four_hours] + 1
    )
    assert (
        counts[metrics.ActiveUserPeriods.seven_days]
        == baseline[metrics.ActiveUserPeriods.seven_days] + 3
    )
    assert (
        counts[metrics.ActiveUserPeriods.thirty_days]
        == baseline[metrics.ActiveUserPeriods.thirty_days] + 5
    )