Merge pull request #4214 from yuvipanda/metricsss

Add active users prometheus metrics
2025-10-18 15:33:02 +00:00 · 2022-12-01 15:45:59 +01:00
parent dca8725876 58475ffcfd
commit 0d72280e5d
3 changed files with 161 additions and 1 deletions
--- a/jupyterhub/app.py
+++ b/jupyterhub/app.py
@@ -74,6 +74,7 @@ from .metrics import (
    INIT_SPAWNERS_DURATION_SECONDS,
    RUNNING_SERVERS,
    TOTAL_USERS,
+    PeriodicMetricsCollector,
 )
 from .oauth.provider import make_provider
 from .objects import Hub, Server
@@ -2914,6 +2915,8 @@ class JupyterHub(Application):
                await self.proxy.check_routes(self.users, self._service_map)

            asyncio.ensure_future(finish_init_spawners())
+        metrics_updater = PeriodicMetricsCollector(parent=self, db=self.db)
+        metrics_updater.start()

    async def cleanup(self):
        """Shutdown managed services and various subprocesses. Cleanup runtime files."""
--- a/jupyterhub/metrics.py
+++ b/jupyterhub/metrics.py
@@ -19,9 +19,16 @@ them manually here.

    added ``jupyterhub_`` prefix to metric names.
 """
+from datetime import timedelta
 from enum import Enum

 from prometheus_client import Gauge, Histogram
+from tornado.ioloop import PeriodicCallback
+from traitlets import Any, Bool, Integer
+from traitlets.config import LoggingConfigurable
+
+from . import orm
+from .utils import utcnow

 REQUEST_DURATION_SECONDS = Histogram(
    'jupyterhub_request_duration_seconds',
@@ -44,6 +51,12 @@ RUNNING_SERVERS = Gauge(

 TOTAL_USERS = Gauge('jupyterhub_total_users', 'total number of users')

+ACTIVE_USERS = Gauge(
+    'jupyterhub_active_users',
+    'number of users who were active in the given time period',
+    ['period'],
+)
+
 CHECK_ROUTES_DURATION_SECONDS = Histogram(
    'jupyterhub_check_routes_duration_seconds',
    'Time taken to validate all routes in proxy',
@@ -179,6 +192,20 @@ for s in ProxyDeleteStatus:
    PROXY_DELETE_DURATION_SECONDS.labels(status=s)


+class ActiveUserPeriods(Enum):
+    """
+    Possible values for 'period' label of ACTIVE_USERS
+    """
+
+    twenty_four_hours = '24h'
+    seven_days = '7d'
+    thirty_days = '30d'
+
+
+for s in ActiveUserPeriods:
+    ACTIVE_USERS.labels(period=s.value)
+
+
 def prometheus_log_method(handler):
    """
    Tornado log handler for recording RED metrics.
@@ -200,3 +227,69 @@ def prometheus_log_method(handler):
        handler=f'{handler.__class__.__module__}.{type(handler).__name__}',
        code=handler.get_status(),
    ).observe(handler.request.request_time())
+
+
+class PeriodicMetricsCollector(LoggingConfigurable):
+    """
+    Collect metrics to be calculated periodically
+    """
+
+    active_users_enabled = Bool(
+        True,
+        help="""
+        Enable active_users prometheus metric.
+
+        Populates a `jupyterhub_active_users` prometheus metric, with a label `period` that counts the time period
+        over which these many users were active. Periods are 24h (24 hours), 7d (7 days) and 30d (30 days).
+        """,
+        config=True,
+    )
+
+    active_users_update_interval = Integer(
+        60 * 60,
+        help="""
+        Number of seconds between updating active_users metrics.
+
+        To avoid extra load on the database, this is only calculated periodically rather than
+        at per-minute intervals. Defaults to once an hour.
+        """,
+        config=True,
+    )
+
+    db = Any(help="SQLAlchemy db session to use for performing queries")
+
+    def update_active_users(self):
+        """Update active users metrics."""
+
+        # All the metrics should be based off a cutoff from a *fixed* point, so we calculate
+        # the fixed point here - and then calculate the individual cutoffs in relation to this
+        # fixed point.
+        now = utcnow()
+        cutoffs = {
+            ActiveUserPeriods.twenty_four_hours: now - timedelta(hours=24),
+            ActiveUserPeriods.seven_days: now - timedelta(days=7),
+            ActiveUserPeriods.thirty_days: now - timedelta(days=30),
+        }
+        for period, cutoff in cutoffs.items():
+            value = (
+                self.db.query(orm.User).filter(orm.User.last_activity >= cutoff).count()
+            )
+
+            self.log.info(f'Found {value} active users in the last {period}')
+            ACTIVE_USERS.labels(period=period.value).set(value)
+
+    def start(self):
+        """
+        Start the periodic update process
+        """
+        if self.active_users_enabled:
+            # Setup periodic refresh of the metric
+            pc = PeriodicCallback(
+                self.update_active_users,
+                self.active_users_update_interval * 1000,
+                jitter=0.01,
+            )
+            pc.start()
+
+            # Update the metrics once on startup too
+            self.update_active_users()
--- a/jupyterhub/tests/test_metrics.py
+++ b/jupyterhub/tests/test_metrics.py
@@ -1,11 +1,13 @@
 import json
+from datetime import timedelta
 from unittest import mock

 import pytest

 from jupyterhub import metrics, orm, roles

-from .utils import api_request, get_page
+from ..utils import utcnow
+from .utils import add_user, api_request, get_page


 async def test_total_users(app):
@@ -73,3 +75,65 @@ async def test_metrics_auth(
    else:
        assert r.status_code == 403
        assert 'read:metrics' in r.text
+
+
+async def test_active_users(app):
+    db = app.db
+    collector = metrics.PeriodicMetricsCollector(db=db)
+    collector.update_active_users()
+    now = utcnow()
+
+    def collect():
+        samples = metrics.ACTIVE_USERS.collect()[0].samples
+        by_period = {
+            metrics.ActiveUserPeriods(sample.labels["period"]): sample.value
+            for sample in samples
+        }
+        print(by_period)
+        return by_period
+
+    baseline = collect()
+
+    for i, offset in enumerate(
+        [
+            None,
+            # in 24h
+            timedelta(hours=23, minutes=30),
+            # in 7d
+            timedelta(hours=24, minutes=1),
+            timedelta(days=6, hours=23, minutes=30),
+            # in 30d
+            timedelta(days=7, minutes=1),
+            timedelta(days=29, hours=23, minutes=30),
+            # not in any
+            timedelta(days=30, minutes=1),
+        ]
+    ):
+        user = add_user(db, name=f"active-{i}")
+        if offset:
+            user.last_activity = now - offset
+        else:
+            user.last_activity = None
+        db.commit()
+
+    # collect before update is called, don't include new users
+    counts = collect()
+    for period in metrics.ActiveUserPeriods:
+        assert period in counts
+        assert counts[period] == baseline[period]
+
+    # collect after updates, check updated counts
+    collector.update_active_users()
+    counts = collect()
+    assert (
+        counts[metrics.ActiveUserPeriods.twenty_four_hours]
+        == baseline[metrics.ActiveUserPeriods.twenty_four_hours] + 1
+    )
+    assert (
+        counts[metrics.ActiveUserPeriods.seven_days]
+        == baseline[metrics.ActiveUserPeriods.seven_days] + 3
+    )
+    assert (
+        counts[metrics.ActiveUserPeriods.thirty_days]
+        == baseline[metrics.ActiveUserPeriods.thirty_days] + 5
+    )