mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-15 14:03:02 +00:00
Merge pull request #2698 from Zsailer/eventlog-tests
Instrument JupyterHub to record events with jupyter_telemetry [Part II]
This commit is contained in:
@@ -5,4 +5,5 @@ alabaster_jupyterhub
|
||||
autodoc-traits
|
||||
recommonmark==0.5.0
|
||||
sphinx-copybutton
|
||||
sphinx-jsonschema
|
||||
sphinx>=1.7
|
||||
|
@@ -19,6 +19,7 @@ extensions = [
|
||||
'sphinx.ext.napoleon',
|
||||
'autodoc_traits',
|
||||
'sphinx_copybutton',
|
||||
'sphinx-jsonschema',
|
||||
]
|
||||
|
||||
templates_path = ['_templates']
|
||||
|
50
docs/source/events/index.rst
Normal file
50
docs/source/events/index.rst
Normal file
@@ -0,0 +1,50 @@
|
||||
Eventlogging and Telemetry
|
||||
==========================
|
||||
|
||||
JupyterHub can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that JupyterHub emits are defined by `JSON schemas`_ listed below_
|
||||
|
||||
emitted as JSON data, defined and validated by the JSON schemas listed below.
|
||||
|
||||
|
||||
.. _logging: https://docs.python.org/3/library/logging.html
|
||||
.. _`Telemetry System`: https://github.com/jupyter/telemetry
|
||||
.. _`JSON schemas`: https://json-schema.org/
|
||||
|
||||
How to emit events
|
||||
------------------
|
||||
|
||||
Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data.
|
||||
|
||||
|
||||
To begin recording events, you'll need to set two configurations:
|
||||
|
||||
1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to
|
||||
2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here.
|
||||
|
||||
Here's a basic example:
|
||||
|
||||
.. code-block::
|
||||
|
||||
import logging
|
||||
|
||||
c.EventLog.handlers = [
|
||||
logging.FileHandler('event.log'),
|
||||
]
|
||||
|
||||
c.EventLog.allowed_schemas = [
|
||||
'hub.jupyter.org/server-action'
|
||||
]
|
||||
|
||||
The output is a file, ``"event.log"``, with events recorded as JSON data.
|
||||
|
||||
|
||||
|
||||
.. _below:
|
||||
|
||||
Event schemas
|
||||
-------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
server-actions.rst
|
1
docs/source/events/server-actions.rst
Normal file
1
docs/source/events/server-actions.rst
Normal file
@@ -0,0 +1 @@
|
||||
.. jsonschema:: ../../../jupyterhub/event-schemas/server-actions/v1.yaml
|
@@ -134,6 +134,14 @@ helps keep our community welcoming to as many people as possible.
|
||||
contributing/roadmap
|
||||
contributing/security
|
||||
|
||||
Eventlogging and Telemetry
|
||||
--------------------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
events/index
|
||||
|
||||
Upgrading JupyterHub
|
||||
--------------------
|
||||
|
||||
|
@@ -28,7 +28,7 @@ if 'jupyterhub' in sys.modules:
|
||||
alembic_logger.propagate = True
|
||||
alembic_logger.parent = app.log
|
||||
else:
|
||||
fileConfig(config.config_file_name)
|
||||
fileConfig(config.config_file_name, disable_existing_loggers=False)
|
||||
else:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
|
@@ -5,6 +5,7 @@
|
||||
import asyncio
|
||||
import atexit
|
||||
import binascii
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
@@ -18,6 +19,7 @@ from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from functools import partial
|
||||
from getpass import getuser
|
||||
from glob import glob
|
||||
from operator import itemgetter
|
||||
from textwrap import dedent
|
||||
from urllib.parse import unquote
|
||||
@@ -59,6 +61,8 @@ from traitlets import (
|
||||
)
|
||||
from traitlets.config import Application, Configurable, catch_config_error
|
||||
|
||||
from jupyter_telemetry.eventlog import EventLog
|
||||
|
||||
here = os.path.dirname(__file__)
|
||||
|
||||
import jupyterhub
|
||||
@@ -2113,6 +2117,7 @@ class JupyterHub(Application):
|
||||
internal_ssl_ca=self.internal_ssl_ca,
|
||||
trusted_alt_names=self.trusted_alt_names,
|
||||
shutdown_on_logout=self.shutdown_on_logout,
|
||||
eventlog=self.eventlog,
|
||||
)
|
||||
# allow configured settings to have priority
|
||||
settings.update(self.tornado_settings)
|
||||
@@ -2138,6 +2143,16 @@ class JupyterHub(Application):
|
||||
e,
|
||||
)
|
||||
|
||||
def init_eventlog(self):
|
||||
"""Set up the event logging system."""
|
||||
self.eventlog = EventLog(parent=self)
|
||||
|
||||
for dirname, _, files in os.walk(os.path.join(here, 'event-schemas')):
|
||||
for file in files:
|
||||
if not file.endswith('.yaml'):
|
||||
continue
|
||||
self.eventlog.register_schema_file(os.path.join(dirname, file))
|
||||
|
||||
def write_pid_file(self):
|
||||
pid = os.getpid()
|
||||
if self.pid_file:
|
||||
@@ -2189,6 +2204,7 @@ class JupyterHub(Application):
|
||||
_log_cls("Authenticator", self.authenticator_class)
|
||||
_log_cls("Spawner", self.spawner_class)
|
||||
|
||||
self.init_eventlog()
|
||||
self.init_pycurl()
|
||||
self.init_secrets()
|
||||
self.init_internal_ssl()
|
||||
|
59
jupyterhub/event-schemas/server-actions/v1.yaml
Normal file
59
jupyterhub/event-schemas/server-actions/v1.yaml
Normal file
@@ -0,0 +1,59 @@
|
||||
"$id": hub.jupyter.org/server-action
|
||||
version: 1
|
||||
title: JupyterHub server events
|
||||
description: |
|
||||
Record actions on user servers made via JupyterHub.
|
||||
|
||||
JupyterHub can perform various actions on user servers via
|
||||
direct interaction from users, or via the API. This event is
|
||||
recorded whenever either of those happen.
|
||||
|
||||
Limitations:
|
||||
|
||||
1. This does not record all server starts / stops, only those
|
||||
explicitly performed by JupyterHub. For example, a user's server
|
||||
can go down because the node it was running on dies. That will
|
||||
not cause an event to be recorded, since it was not initiated
|
||||
by JupyterHub. In practice this happens often, so this is not
|
||||
a complete record.
|
||||
2. Events are only recorded when an action succeeds.
|
||||
type: object
|
||||
required:
|
||||
- action
|
||||
- username
|
||||
- servername
|
||||
properties:
|
||||
action:
|
||||
enum:
|
||||
- start
|
||||
- stop
|
||||
description: |
|
||||
Action performed by JupyterHub.
|
||||
|
||||
This is a required field.
|
||||
|
||||
Possibl Values:
|
||||
|
||||
1. start
|
||||
A user's server was successfully started
|
||||
|
||||
2. stop
|
||||
A user's server was successfully stopped
|
||||
username:
|
||||
type: string
|
||||
description: |
|
||||
Name of the user whose server this action was performed on.
|
||||
|
||||
This is the normalized name used by JupyterHub itself,
|
||||
which is derived from the authentication provider used but
|
||||
might not be the same as used in the authentication provider.
|
||||
servername:
|
||||
type: string
|
||||
description: |
|
||||
Name of the server this action was performed on.
|
||||
|
||||
JupyterHub supports each user having multiple servers with
|
||||
arbitrary names, and this field specifies the name of the
|
||||
server.
|
||||
|
||||
The 'default' server is denoted by the empty string
|
@@ -156,6 +156,10 @@ class BaseHandler(RequestHandler):
|
||||
def oauth_provider(self):
|
||||
return self.settings['oauth_provider']
|
||||
|
||||
@property
|
||||
def eventlog(self):
|
||||
return self.settings['eventlog']
|
||||
|
||||
def finish(self, *args, **kwargs):
|
||||
"""Roll back any uncommitted transactions from the handler."""
|
||||
if self.db.dirty:
|
||||
@@ -848,6 +852,11 @@ class BaseHandler(RequestHandler):
|
||||
SERVER_SPAWN_DURATION_SECONDS.labels(
|
||||
status=ServerSpawnStatus.success
|
||||
).observe(time.perf_counter() - spawn_start_time)
|
||||
self.eventlog.record_event(
|
||||
'hub.jupyter.org/server-action',
|
||||
1,
|
||||
{'action': 'start', 'username': user.name, 'servername': server_name},
|
||||
)
|
||||
proxy_add_start_time = time.perf_counter()
|
||||
spawner._proxy_pending = True
|
||||
try:
|
||||
@@ -1028,6 +1037,15 @@ class BaseHandler(RequestHandler):
|
||||
SERVER_STOP_DURATION_SECONDS.labels(
|
||||
status=ServerStopStatus.success
|
||||
).observe(toc - tic)
|
||||
self.eventlog.record_event(
|
||||
'hub.jupyter.org/server-action',
|
||||
1,
|
||||
{
|
||||
'action': 'stop',
|
||||
'username': user.name,
|
||||
'servername': server_name,
|
||||
},
|
||||
)
|
||||
except:
|
||||
SERVER_STOP_DURATION_SECONDS.labels(
|
||||
status=ServerStopStatus.failure
|
||||
|
80
jupyterhub/tests/test_eventlog.py
Normal file
80
jupyterhub/tests/test_eventlog.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Tests for Eventlogging in JupyterHub.
|
||||
|
||||
To test a new schema or event, simply add it to the
|
||||
`valid_events` and `invalid_events` variables below.
|
||||
|
||||
You *shouldn't* need to write new tests.
|
||||
"""
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
from unittest import mock
|
||||
|
||||
import jsonschema
|
||||
import pytest
|
||||
from traitlets.config import Config
|
||||
|
||||
from .mocking import MockHub
|
||||
|
||||
|
||||
# To test new schemas, add them to the `valid_events`
|
||||
# and `invalid_events` dictionary below.
|
||||
|
||||
# To test valid events, add event item with the form:
|
||||
# { ( '<schema id>', <version> ) : { <event_data> } }
|
||||
valid_events = [
|
||||
(
|
||||
'hub.jupyter.org/server-action',
|
||||
1,
|
||||
dict(action='start', username='test-username', servername='test-servername'),
|
||||
)
|
||||
]
|
||||
|
||||
# To test invalid events, add event item with the form:
|
||||
# { ( '<schema id>', <version> ) : { <event_data> } }
|
||||
invalid_events = [
|
||||
# Missing required keys
|
||||
('hub.jupyter.org/server-action', 1, dict(action='start'))
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def eventlog_sink(app):
|
||||
"""Return eventlog and sink objects"""
|
||||
sink = io.StringIO()
|
||||
handler = logging.StreamHandler(sink)
|
||||
# Update the EventLog config with handler
|
||||
cfg = Config()
|
||||
cfg.EventLog.handlers = [handler]
|
||||
|
||||
with mock.patch.object(app.config, 'EventLog', cfg.EventLog):
|
||||
# recreate the eventlog object with our config
|
||||
app.init_eventlog()
|
||||
# return the sink from the fixture
|
||||
yield app.eventlog, sink
|
||||
# reset eventlog with original config
|
||||
app.init_eventlog()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('schema, version, event', valid_events)
|
||||
def test_valid_events(eventlog_sink, schema, version, event):
|
||||
eventlog, sink = eventlog_sink
|
||||
eventlog.allowed_schemas = [schema]
|
||||
# Record event
|
||||
eventlog.record_event(schema, version, event)
|
||||
# Inspect consumed event
|
||||
output = sink.getvalue()
|
||||
assert output
|
||||
data = json.loads(output)
|
||||
# Verify event data was recorded
|
||||
assert data is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize('schema, version, event', invalid_events)
|
||||
def test_invalid_events(eventlog_sink, schema, version, event):
|
||||
eventlog, sink = eventlog_sink
|
||||
eventlog.allowed_schemas = [schema]
|
||||
|
||||
# Make sure an error is thrown when bad events are recorded
|
||||
with pytest.raises(jsonschema.ValidationError):
|
||||
recorded_event = eventlog.record_event(schema, version, event)
|
@@ -3,6 +3,7 @@ async_generator>=1.8
|
||||
certipy>=0.1.2
|
||||
entrypoints
|
||||
jinja2
|
||||
jupyter_telemetry
|
||||
oauthlib>=3.0
|
||||
pamela
|
||||
prometheus_client>=0.0.21
|
||||
|
Reference in New Issue
Block a user