mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-15 22:13:00 +00:00
Merge pull request #2698 from Zsailer/eventlog-tests
Instrument JupyterHub to record events with jupyter_telemetry [Part II]
This commit is contained in:
@@ -5,4 +5,5 @@ alabaster_jupyterhub
|
|||||||
autodoc-traits
|
autodoc-traits
|
||||||
recommonmark==0.5.0
|
recommonmark==0.5.0
|
||||||
sphinx-copybutton
|
sphinx-copybutton
|
||||||
|
sphinx-jsonschema
|
||||||
sphinx>=1.7
|
sphinx>=1.7
|
||||||
|
@@ -19,6 +19,7 @@ extensions = [
|
|||||||
'sphinx.ext.napoleon',
|
'sphinx.ext.napoleon',
|
||||||
'autodoc_traits',
|
'autodoc_traits',
|
||||||
'sphinx_copybutton',
|
'sphinx_copybutton',
|
||||||
|
'sphinx-jsonschema',
|
||||||
]
|
]
|
||||||
|
|
||||||
templates_path = ['_templates']
|
templates_path = ['_templates']
|
||||||
|
50
docs/source/events/index.rst
Normal file
50
docs/source/events/index.rst
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
Eventlogging and Telemetry
|
||||||
|
==========================
|
||||||
|
|
||||||
|
JupyterHub can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that JupyterHub emits are defined by `JSON schemas`_ listed below_
|
||||||
|
|
||||||
|
emitted as JSON data, defined and validated by the JSON schemas listed below.
|
||||||
|
|
||||||
|
|
||||||
|
.. _logging: https://docs.python.org/3/library/logging.html
|
||||||
|
.. _`Telemetry System`: https://github.com/jupyter/telemetry
|
||||||
|
.. _`JSON schemas`: https://json-schema.org/
|
||||||
|
|
||||||
|
How to emit events
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data.
|
||||||
|
|
||||||
|
|
||||||
|
To begin recording events, you'll need to set two configurations:
|
||||||
|
|
||||||
|
1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to
|
||||||
|
2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here.
|
||||||
|
|
||||||
|
Here's a basic example:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
c.EventLog.handlers = [
|
||||||
|
logging.FileHandler('event.log'),
|
||||||
|
]
|
||||||
|
|
||||||
|
c.EventLog.allowed_schemas = [
|
||||||
|
'hub.jupyter.org/server-action'
|
||||||
|
]
|
||||||
|
|
||||||
|
The output is a file, ``"event.log"``, with events recorded as JSON data.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.. _below:
|
||||||
|
|
||||||
|
Event schemas
|
||||||
|
-------------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
server-actions.rst
|
1
docs/source/events/server-actions.rst
Normal file
1
docs/source/events/server-actions.rst
Normal file
@@ -0,0 +1 @@
|
|||||||
|
.. jsonschema:: ../../../jupyterhub/event-schemas/server-actions/v1.yaml
|
@@ -134,6 +134,14 @@ helps keep our community welcoming to as many people as possible.
|
|||||||
contributing/roadmap
|
contributing/roadmap
|
||||||
contributing/security
|
contributing/security
|
||||||
|
|
||||||
|
Eventlogging and Telemetry
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
events/index
|
||||||
|
|
||||||
Upgrading JupyterHub
|
Upgrading JupyterHub
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
@@ -28,7 +28,7 @@ if 'jupyterhub' in sys.modules:
|
|||||||
alembic_logger.propagate = True
|
alembic_logger.propagate = True
|
||||||
alembic_logger.parent = app.log
|
alembic_logger.parent = app.log
|
||||||
else:
|
else:
|
||||||
fileConfig(config.config_file_name)
|
fileConfig(config.config_file_name, disable_existing_loggers=False)
|
||||||
else:
|
else:
|
||||||
fileConfig(config.config_file_name)
|
fileConfig(config.config_file_name)
|
||||||
|
|
||||||
|
@@ -5,6 +5,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import atexit
|
import atexit
|
||||||
import binascii
|
import binascii
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -18,6 +19,7 @@ from datetime import timedelta
|
|||||||
from datetime import timezone
|
from datetime import timezone
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from getpass import getuser
|
from getpass import getuser
|
||||||
|
from glob import glob
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
@@ -59,6 +61,8 @@ from traitlets import (
|
|||||||
)
|
)
|
||||||
from traitlets.config import Application, Configurable, catch_config_error
|
from traitlets.config import Application, Configurable, catch_config_error
|
||||||
|
|
||||||
|
from jupyter_telemetry.eventlog import EventLog
|
||||||
|
|
||||||
here = os.path.dirname(__file__)
|
here = os.path.dirname(__file__)
|
||||||
|
|
||||||
import jupyterhub
|
import jupyterhub
|
||||||
@@ -2113,6 +2117,7 @@ class JupyterHub(Application):
|
|||||||
internal_ssl_ca=self.internal_ssl_ca,
|
internal_ssl_ca=self.internal_ssl_ca,
|
||||||
trusted_alt_names=self.trusted_alt_names,
|
trusted_alt_names=self.trusted_alt_names,
|
||||||
shutdown_on_logout=self.shutdown_on_logout,
|
shutdown_on_logout=self.shutdown_on_logout,
|
||||||
|
eventlog=self.eventlog,
|
||||||
)
|
)
|
||||||
# allow configured settings to have priority
|
# allow configured settings to have priority
|
||||||
settings.update(self.tornado_settings)
|
settings.update(self.tornado_settings)
|
||||||
@@ -2138,6 +2143,16 @@ class JupyterHub(Application):
|
|||||||
e,
|
e,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def init_eventlog(self):
|
||||||
|
"""Set up the event logging system."""
|
||||||
|
self.eventlog = EventLog(parent=self)
|
||||||
|
|
||||||
|
for dirname, _, files in os.walk(os.path.join(here, 'event-schemas')):
|
||||||
|
for file in files:
|
||||||
|
if not file.endswith('.yaml'):
|
||||||
|
continue
|
||||||
|
self.eventlog.register_schema_file(os.path.join(dirname, file))
|
||||||
|
|
||||||
def write_pid_file(self):
|
def write_pid_file(self):
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
if self.pid_file:
|
if self.pid_file:
|
||||||
@@ -2189,6 +2204,7 @@ class JupyterHub(Application):
|
|||||||
_log_cls("Authenticator", self.authenticator_class)
|
_log_cls("Authenticator", self.authenticator_class)
|
||||||
_log_cls("Spawner", self.spawner_class)
|
_log_cls("Spawner", self.spawner_class)
|
||||||
|
|
||||||
|
self.init_eventlog()
|
||||||
self.init_pycurl()
|
self.init_pycurl()
|
||||||
self.init_secrets()
|
self.init_secrets()
|
||||||
self.init_internal_ssl()
|
self.init_internal_ssl()
|
||||||
|
59
jupyterhub/event-schemas/server-actions/v1.yaml
Normal file
59
jupyterhub/event-schemas/server-actions/v1.yaml
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
"$id": hub.jupyter.org/server-action
|
||||||
|
version: 1
|
||||||
|
title: JupyterHub server events
|
||||||
|
description: |
|
||||||
|
Record actions on user servers made via JupyterHub.
|
||||||
|
|
||||||
|
JupyterHub can perform various actions on user servers via
|
||||||
|
direct interaction from users, or via the API. This event is
|
||||||
|
recorded whenever either of those happen.
|
||||||
|
|
||||||
|
Limitations:
|
||||||
|
|
||||||
|
1. This does not record all server starts / stops, only those
|
||||||
|
explicitly performed by JupyterHub. For example, a user's server
|
||||||
|
can go down because the node it was running on dies. That will
|
||||||
|
not cause an event to be recorded, since it was not initiated
|
||||||
|
by JupyterHub. In practice this happens often, so this is not
|
||||||
|
a complete record.
|
||||||
|
2. Events are only recorded when an action succeeds.
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- action
|
||||||
|
- username
|
||||||
|
- servername
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
enum:
|
||||||
|
- start
|
||||||
|
- stop
|
||||||
|
description: |
|
||||||
|
Action performed by JupyterHub.
|
||||||
|
|
||||||
|
This is a required field.
|
||||||
|
|
||||||
|
Possibl Values:
|
||||||
|
|
||||||
|
1. start
|
||||||
|
A user's server was successfully started
|
||||||
|
|
||||||
|
2. stop
|
||||||
|
A user's server was successfully stopped
|
||||||
|
username:
|
||||||
|
type: string
|
||||||
|
description: |
|
||||||
|
Name of the user whose server this action was performed on.
|
||||||
|
|
||||||
|
This is the normalized name used by JupyterHub itself,
|
||||||
|
which is derived from the authentication provider used but
|
||||||
|
might not be the same as used in the authentication provider.
|
||||||
|
servername:
|
||||||
|
type: string
|
||||||
|
description: |
|
||||||
|
Name of the server this action was performed on.
|
||||||
|
|
||||||
|
JupyterHub supports each user having multiple servers with
|
||||||
|
arbitrary names, and this field specifies the name of the
|
||||||
|
server.
|
||||||
|
|
||||||
|
The 'default' server is denoted by the empty string
|
@@ -156,6 +156,10 @@ class BaseHandler(RequestHandler):
|
|||||||
def oauth_provider(self):
|
def oauth_provider(self):
|
||||||
return self.settings['oauth_provider']
|
return self.settings['oauth_provider']
|
||||||
|
|
||||||
|
@property
|
||||||
|
def eventlog(self):
|
||||||
|
return self.settings['eventlog']
|
||||||
|
|
||||||
def finish(self, *args, **kwargs):
|
def finish(self, *args, **kwargs):
|
||||||
"""Roll back any uncommitted transactions from the handler."""
|
"""Roll back any uncommitted transactions from the handler."""
|
||||||
if self.db.dirty:
|
if self.db.dirty:
|
||||||
@@ -848,6 +852,11 @@ class BaseHandler(RequestHandler):
|
|||||||
SERVER_SPAWN_DURATION_SECONDS.labels(
|
SERVER_SPAWN_DURATION_SECONDS.labels(
|
||||||
status=ServerSpawnStatus.success
|
status=ServerSpawnStatus.success
|
||||||
).observe(time.perf_counter() - spawn_start_time)
|
).observe(time.perf_counter() - spawn_start_time)
|
||||||
|
self.eventlog.record_event(
|
||||||
|
'hub.jupyter.org/server-action',
|
||||||
|
1,
|
||||||
|
{'action': 'start', 'username': user.name, 'servername': server_name},
|
||||||
|
)
|
||||||
proxy_add_start_time = time.perf_counter()
|
proxy_add_start_time = time.perf_counter()
|
||||||
spawner._proxy_pending = True
|
spawner._proxy_pending = True
|
||||||
try:
|
try:
|
||||||
@@ -1028,6 +1037,15 @@ class BaseHandler(RequestHandler):
|
|||||||
SERVER_STOP_DURATION_SECONDS.labels(
|
SERVER_STOP_DURATION_SECONDS.labels(
|
||||||
status=ServerStopStatus.success
|
status=ServerStopStatus.success
|
||||||
).observe(toc - tic)
|
).observe(toc - tic)
|
||||||
|
self.eventlog.record_event(
|
||||||
|
'hub.jupyter.org/server-action',
|
||||||
|
1,
|
||||||
|
{
|
||||||
|
'action': 'stop',
|
||||||
|
'username': user.name,
|
||||||
|
'servername': server_name,
|
||||||
|
},
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
SERVER_STOP_DURATION_SECONDS.labels(
|
SERVER_STOP_DURATION_SECONDS.labels(
|
||||||
status=ServerStopStatus.failure
|
status=ServerStopStatus.failure
|
||||||
|
80
jupyterhub/tests/test_eventlog.py
Normal file
80
jupyterhub/tests/test_eventlog.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""Tests for Eventlogging in JupyterHub.
|
||||||
|
|
||||||
|
To test a new schema or event, simply add it to the
|
||||||
|
`valid_events` and `invalid_events` variables below.
|
||||||
|
|
||||||
|
You *shouldn't* need to write new tests.
|
||||||
|
"""
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import jsonschema
|
||||||
|
import pytest
|
||||||
|
from traitlets.config import Config
|
||||||
|
|
||||||
|
from .mocking import MockHub
|
||||||
|
|
||||||
|
|
||||||
|
# To test new schemas, add them to the `valid_events`
|
||||||
|
# and `invalid_events` dictionary below.
|
||||||
|
|
||||||
|
# To test valid events, add event item with the form:
|
||||||
|
# { ( '<schema id>', <version> ) : { <event_data> } }
|
||||||
|
valid_events = [
|
||||||
|
(
|
||||||
|
'hub.jupyter.org/server-action',
|
||||||
|
1,
|
||||||
|
dict(action='start', username='test-username', servername='test-servername'),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# To test invalid events, add event item with the form:
|
||||||
|
# { ( '<schema id>', <version> ) : { <event_data> } }
|
||||||
|
invalid_events = [
|
||||||
|
# Missing required keys
|
||||||
|
('hub.jupyter.org/server-action', 1, dict(action='start'))
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def eventlog_sink(app):
|
||||||
|
"""Return eventlog and sink objects"""
|
||||||
|
sink = io.StringIO()
|
||||||
|
handler = logging.StreamHandler(sink)
|
||||||
|
# Update the EventLog config with handler
|
||||||
|
cfg = Config()
|
||||||
|
cfg.EventLog.handlers = [handler]
|
||||||
|
|
||||||
|
with mock.patch.object(app.config, 'EventLog', cfg.EventLog):
|
||||||
|
# recreate the eventlog object with our config
|
||||||
|
app.init_eventlog()
|
||||||
|
# return the sink from the fixture
|
||||||
|
yield app.eventlog, sink
|
||||||
|
# reset eventlog with original config
|
||||||
|
app.init_eventlog()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('schema, version, event', valid_events)
|
||||||
|
def test_valid_events(eventlog_sink, schema, version, event):
|
||||||
|
eventlog, sink = eventlog_sink
|
||||||
|
eventlog.allowed_schemas = [schema]
|
||||||
|
# Record event
|
||||||
|
eventlog.record_event(schema, version, event)
|
||||||
|
# Inspect consumed event
|
||||||
|
output = sink.getvalue()
|
||||||
|
assert output
|
||||||
|
data = json.loads(output)
|
||||||
|
# Verify event data was recorded
|
||||||
|
assert data is not None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('schema, version, event', invalid_events)
|
||||||
|
def test_invalid_events(eventlog_sink, schema, version, event):
|
||||||
|
eventlog, sink = eventlog_sink
|
||||||
|
eventlog.allowed_schemas = [schema]
|
||||||
|
|
||||||
|
# Make sure an error is thrown when bad events are recorded
|
||||||
|
with pytest.raises(jsonschema.ValidationError):
|
||||||
|
recorded_event = eventlog.record_event(schema, version, event)
|
@@ -3,6 +3,7 @@ async_generator>=1.8
|
|||||||
certipy>=0.1.2
|
certipy>=0.1.2
|
||||||
entrypoints
|
entrypoints
|
||||||
jinja2
|
jinja2
|
||||||
|
jupyter_telemetry
|
||||||
oauthlib>=3.0
|
oauthlib>=3.0
|
||||||
pamela
|
pamela
|
||||||
prometheus_client>=0.0.21
|
prometheus_client>=0.0.21
|
||||||
|
Reference in New Issue
Block a user