diff --git a/docs/requirements.txt b/docs/requirements.txt index 44c782f0..93b1e665 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,4 +5,5 @@ alabaster_jupyterhub autodoc-traits recommonmark==0.5.0 sphinx-copybutton +sphinx-jsonschema sphinx>=1.7 diff --git a/docs/source/conf.py b/docs/source/conf.py index 5f8d3632..b7d9f7c4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,6 +19,7 @@ extensions = [ 'sphinx.ext.napoleon', 'autodoc_traits', 'sphinx_copybutton', + 'sphinx-jsonschema', ] templates_path = ['_templates'] diff --git a/docs/source/events/index.rst b/docs/source/events/index.rst new file mode 100644 index 00000000..90e30acb --- /dev/null +++ b/docs/source/events/index.rst @@ -0,0 +1,50 @@ +Eventlogging and Telemetry +========================== + +JupyterHub can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that JupyterHub emits are defined by `JSON schemas`_ listed below_ + + emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +How to emit events +------------------ + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + + + +.. _below: + +Event schemas +------------- + +.. toctree:: + :maxdepth: 2 + + server-actions.rst diff --git a/docs/source/events/server-actions.rst b/docs/source/events/server-actions.rst new file mode 100644 index 00000000..12018713 --- /dev/null +++ b/docs/source/events/server-actions.rst @@ -0,0 +1 @@ +.. jsonschema:: ../../../jupyterhub/event-schemas/server-actions/v1.yaml diff --git a/docs/source/index.rst b/docs/source/index.rst index db23641a..274905f5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -134,6 +134,14 @@ helps keep our community welcoming to as many people as possible. contributing/roadmap contributing/security +Eventlogging and Telemetry +-------------------------- + +.. toctree:: + :maxdepth: 1 + + events/index + Upgrading JupyterHub -------------------- diff --git a/jupyterhub/alembic/env.py b/jupyterhub/alembic/env.py index 4846f4c1..584f82c1 100644 --- a/jupyterhub/alembic/env.py +++ b/jupyterhub/alembic/env.py @@ -28,7 +28,7 @@ if 'jupyterhub' in sys.modules: alembic_logger.propagate = True alembic_logger.parent = app.log else: - fileConfig(config.config_file_name) + fileConfig(config.config_file_name, disable_existing_loggers=False) else: fileConfig(config.config_file_name) diff --git a/jupyterhub/app.py b/jupyterhub/app.py index c0998028..95d25d3c 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -5,6 +5,7 @@ import asyncio import atexit import binascii +import json import logging import os import re @@ -18,6 +19,7 @@ from datetime import timedelta from datetime import timezone from functools import partial from getpass import getuser +from glob import glob from operator import itemgetter from textwrap import dedent from urllib.parse import unquote @@ -59,6 +61,8 @@ from traitlets import ( ) from traitlets.config import Application, Configurable, catch_config_error +from jupyter_telemetry.eventlog import EventLog + here = os.path.dirname(__file__) import jupyterhub @@ -2113,6 +2117,7 @@ class JupyterHub(Application): internal_ssl_ca=self.internal_ssl_ca, trusted_alt_names=self.trusted_alt_names, shutdown_on_logout=self.shutdown_on_logout, + eventlog=self.eventlog, ) # allow configured settings to have priority settings.update(self.tornado_settings) @@ -2138,6 +2143,16 @@ class JupyterHub(Application): e, ) + def init_eventlog(self): + """Set up the event logging system.""" + self.eventlog = EventLog(parent=self) + + for dirname, _, files in os.walk(os.path.join(here, 'event-schemas')): + for file in files: + if not file.endswith('.yaml'): + continue + self.eventlog.register_schema_file(os.path.join(dirname, file)) + def write_pid_file(self): pid = os.getpid() if self.pid_file: @@ -2189,6 +2204,7 @@ class JupyterHub(Application): _log_cls("Authenticator", self.authenticator_class) _log_cls("Spawner", self.spawner_class) + self.init_eventlog() self.init_pycurl() self.init_secrets() self.init_internal_ssl() diff --git a/jupyterhub/event-schemas/server-actions/v1.yaml b/jupyterhub/event-schemas/server-actions/v1.yaml new file mode 100644 index 00000000..35ffad67 --- /dev/null +++ b/jupyterhub/event-schemas/server-actions/v1.yaml @@ -0,0 +1,59 @@ +"$id": hub.jupyter.org/server-action +version: 1 +title: JupyterHub server events +description: | + Record actions on user servers made via JupyterHub. + + JupyterHub can perform various actions on user servers via + direct interaction from users, or via the API. This event is + recorded whenever either of those happen. + + Limitations: + + 1. This does not record all server starts / stops, only those + explicitly performed by JupyterHub. For example, a user's server + can go down because the node it was running on dies. That will + not cause an event to be recorded, since it was not initiated + by JupyterHub. In practice this happens often, so this is not + a complete record. + 2. Events are only recorded when an action succeeds. +type: object +required: +- action +- username +- servername +properties: + action: + enum: + - start + - stop + description: | + Action performed by JupyterHub. + + This is a required field. + + Possibl Values: + + 1. start + A user's server was successfully started + + 2. stop + A user's server was successfully stopped + username: + type: string + description: | + Name of the user whose server this action was performed on. + + This is the normalized name used by JupyterHub itself, + which is derived from the authentication provider used but + might not be the same as used in the authentication provider. + servername: + type: string + description: | + Name of the server this action was performed on. + + JupyterHub supports each user having multiple servers with + arbitrary names, and this field specifies the name of the + server. + + The 'default' server is denoted by the empty string diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index d803f834..03d9cf3b 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -156,6 +156,10 @@ class BaseHandler(RequestHandler): def oauth_provider(self): return self.settings['oauth_provider'] + @property + def eventlog(self): + return self.settings['eventlog'] + def finish(self, *args, **kwargs): """Roll back any uncommitted transactions from the handler.""" if self.db.dirty: @@ -848,6 +852,11 @@ class BaseHandler(RequestHandler): SERVER_SPAWN_DURATION_SECONDS.labels( status=ServerSpawnStatus.success ).observe(time.perf_counter() - spawn_start_time) + self.eventlog.record_event( + 'hub.jupyter.org/server-action', + 1, + {'action': 'start', 'username': user.name, 'servername': server_name}, + ) proxy_add_start_time = time.perf_counter() spawner._proxy_pending = True try: @@ -1028,6 +1037,15 @@ class BaseHandler(RequestHandler): SERVER_STOP_DURATION_SECONDS.labels( status=ServerStopStatus.success ).observe(toc - tic) + self.eventlog.record_event( + 'hub.jupyter.org/server-action', + 1, + { + 'action': 'stop', + 'username': user.name, + 'servername': server_name, + }, + ) except: SERVER_STOP_DURATION_SECONDS.labels( status=ServerStopStatus.failure diff --git a/jupyterhub/tests/test_eventlog.py b/jupyterhub/tests/test_eventlog.py new file mode 100644 index 00000000..3cfbaec1 --- /dev/null +++ b/jupyterhub/tests/test_eventlog.py @@ -0,0 +1,80 @@ +"""Tests for Eventlogging in JupyterHub. + +To test a new schema or event, simply add it to the +`valid_events` and `invalid_events` variables below. + +You *shouldn't* need to write new tests. +""" +import io +import json +import logging +from unittest import mock + +import jsonschema +import pytest +from traitlets.config import Config + +from .mocking import MockHub + + +# To test new schemas, add them to the `valid_events` +# and `invalid_events` dictionary below. + +# To test valid events, add event item with the form: +# { ( '', ) : { } } +valid_events = [ + ( + 'hub.jupyter.org/server-action', + 1, + dict(action='start', username='test-username', servername='test-servername'), + ) +] + +# To test invalid events, add event item with the form: +# { ( '', ) : { } } +invalid_events = [ + # Missing required keys + ('hub.jupyter.org/server-action', 1, dict(action='start')) +] + + +@pytest.fixture +def eventlog_sink(app): + """Return eventlog and sink objects""" + sink = io.StringIO() + handler = logging.StreamHandler(sink) + # Update the EventLog config with handler + cfg = Config() + cfg.EventLog.handlers = [handler] + + with mock.patch.object(app.config, 'EventLog', cfg.EventLog): + # recreate the eventlog object with our config + app.init_eventlog() + # return the sink from the fixture + yield app.eventlog, sink + # reset eventlog with original config + app.init_eventlog() + + +@pytest.mark.parametrize('schema, version, event', valid_events) +def test_valid_events(eventlog_sink, schema, version, event): + eventlog, sink = eventlog_sink + eventlog.allowed_schemas = [schema] + # Record event + eventlog.record_event(schema, version, event) + # Inspect consumed event + output = sink.getvalue() + assert output + data = json.loads(output) + # Verify event data was recorded + assert data is not None + + +@pytest.mark.parametrize('schema, version, event', invalid_events) +def test_invalid_events(eventlog_sink, schema, version, event): + eventlog, sink = eventlog_sink + eventlog.allowed_schemas = [schema] + + # Make sure an error is thrown when bad events are recorded + with pytest.raises(jsonschema.ValidationError): + recorded_event = eventlog.record_event(schema, version, event) diff --git a/requirements.txt b/requirements.txt index ad40788f..7821b905 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ async_generator>=1.8 certipy>=0.1.2 entrypoints jinja2 +jupyter_telemetry oauthlib>=3.0 pamela prometheus_client>=0.0.21