Merge pull request #2698 from Zsailer/eventlog-tests

Instrument JupyterHub to record events with jupyter_telemetry [Part II]
This commit is contained in:
Min RK
2019-09-24 11:08:45 +02:00
committed by GitHub
11 changed files with 236 additions and 1 deletions

View File

@@ -5,4 +5,5 @@ alabaster_jupyterhub
autodoc-traits
recommonmark==0.5.0
sphinx-copybutton
sphinx-jsonschema
sphinx>=1.7

View File

@@ -19,6 +19,7 @@ extensions = [
'sphinx.ext.napoleon',
'autodoc_traits',
'sphinx_copybutton',
'sphinx-jsonschema',
]
templates_path = ['_templates']

View File

@@ -0,0 +1,50 @@
Eventlogging and Telemetry
==========================
JupyterHub can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that JupyterHub emits are defined by `JSON schemas`_ listed below_
emitted as JSON data, defined and validated by the JSON schemas listed below.
.. _logging: https://docs.python.org/3/library/logging.html
.. _`Telemetry System`: https://github.com/jupyter/telemetry
.. _`JSON schemas`: https://json-schema.org/
How to emit events
------------------
Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data.
To begin recording events, you'll need to set two configurations:
1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to
2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here.
Here's a basic example:
.. code-block::
import logging
c.EventLog.handlers = [
logging.FileHandler('event.log'),
]
c.EventLog.allowed_schemas = [
'hub.jupyter.org/server-action'
]
The output is a file, ``"event.log"``, with events recorded as JSON data.
.. _below:
Event schemas
-------------
.. toctree::
:maxdepth: 2
server-actions.rst

View File

@@ -0,0 +1 @@
.. jsonschema:: ../../../jupyterhub/event-schemas/server-actions/v1.yaml

View File

@@ -134,6 +134,14 @@ helps keep our community welcoming to as many people as possible.
contributing/roadmap
contributing/security
Eventlogging and Telemetry
--------------------------
.. toctree::
:maxdepth: 1
events/index
Upgrading JupyterHub
--------------------

View File

@@ -28,7 +28,7 @@ if 'jupyterhub' in sys.modules:
alembic_logger.propagate = True
alembic_logger.parent = app.log
else:
fileConfig(config.config_file_name)
fileConfig(config.config_file_name, disable_existing_loggers=False)
else:
fileConfig(config.config_file_name)

View File

@@ -5,6 +5,7 @@
import asyncio
import atexit
import binascii
import json
import logging
import os
import re
@@ -18,6 +19,7 @@ from datetime import timedelta
from datetime import timezone
from functools import partial
from getpass import getuser
from glob import glob
from operator import itemgetter
from textwrap import dedent
from urllib.parse import unquote
@@ -59,6 +61,8 @@ from traitlets import (
)
from traitlets.config import Application, Configurable, catch_config_error
from jupyter_telemetry.eventlog import EventLog
here = os.path.dirname(__file__)
import jupyterhub
@@ -2113,6 +2117,7 @@ class JupyterHub(Application):
internal_ssl_ca=self.internal_ssl_ca,
trusted_alt_names=self.trusted_alt_names,
shutdown_on_logout=self.shutdown_on_logout,
eventlog=self.eventlog,
)
# allow configured settings to have priority
settings.update(self.tornado_settings)
@@ -2138,6 +2143,16 @@ class JupyterHub(Application):
e,
)
def init_eventlog(self):
"""Set up the event logging system."""
self.eventlog = EventLog(parent=self)
for dirname, _, files in os.walk(os.path.join(here, 'event-schemas')):
for file in files:
if not file.endswith('.yaml'):
continue
self.eventlog.register_schema_file(os.path.join(dirname, file))
def write_pid_file(self):
pid = os.getpid()
if self.pid_file:
@@ -2189,6 +2204,7 @@ class JupyterHub(Application):
_log_cls("Authenticator", self.authenticator_class)
_log_cls("Spawner", self.spawner_class)
self.init_eventlog()
self.init_pycurl()
self.init_secrets()
self.init_internal_ssl()

View File

@@ -0,0 +1,59 @@
"$id": hub.jupyter.org/server-action
version: 1
title: JupyterHub server events
description: |
Record actions on user servers made via JupyterHub.
JupyterHub can perform various actions on user servers via
direct interaction from users, or via the API. This event is
recorded whenever either of those happen.
Limitations:
1. This does not record all server starts / stops, only those
explicitly performed by JupyterHub. For example, a user's server
can go down because the node it was running on dies. That will
not cause an event to be recorded, since it was not initiated
by JupyterHub. In practice this happens often, so this is not
a complete record.
2. Events are only recorded when an action succeeds.
type: object
required:
- action
- username
- servername
properties:
action:
enum:
- start
- stop
description: |
Action performed by JupyterHub.
This is a required field.
Possibl Values:
1. start
A user's server was successfully started
2. stop
A user's server was successfully stopped
username:
type: string
description: |
Name of the user whose server this action was performed on.
This is the normalized name used by JupyterHub itself,
which is derived from the authentication provider used but
might not be the same as used in the authentication provider.
servername:
type: string
description: |
Name of the server this action was performed on.
JupyterHub supports each user having multiple servers with
arbitrary names, and this field specifies the name of the
server.
The 'default' server is denoted by the empty string

View File

@@ -156,6 +156,10 @@ class BaseHandler(RequestHandler):
def oauth_provider(self):
return self.settings['oauth_provider']
@property
def eventlog(self):
return self.settings['eventlog']
def finish(self, *args, **kwargs):
"""Roll back any uncommitted transactions from the handler."""
if self.db.dirty:
@@ -848,6 +852,11 @@ class BaseHandler(RequestHandler):
SERVER_SPAWN_DURATION_SECONDS.labels(
status=ServerSpawnStatus.success
).observe(time.perf_counter() - spawn_start_time)
self.eventlog.record_event(
'hub.jupyter.org/server-action',
1,
{'action': 'start', 'username': user.name, 'servername': server_name},
)
proxy_add_start_time = time.perf_counter()
spawner._proxy_pending = True
try:
@@ -1028,6 +1037,15 @@ class BaseHandler(RequestHandler):
SERVER_STOP_DURATION_SECONDS.labels(
status=ServerStopStatus.success
).observe(toc - tic)
self.eventlog.record_event(
'hub.jupyter.org/server-action',
1,
{
'action': 'stop',
'username': user.name,
'servername': server_name,
},
)
except:
SERVER_STOP_DURATION_SECONDS.labels(
status=ServerStopStatus.failure

View File

@@ -0,0 +1,80 @@
"""Tests for Eventlogging in JupyterHub.
To test a new schema or event, simply add it to the
`valid_events` and `invalid_events` variables below.
You *shouldn't* need to write new tests.
"""
import io
import json
import logging
from unittest import mock
import jsonschema
import pytest
from traitlets.config import Config
from .mocking import MockHub
# To test new schemas, add them to the `valid_events`
# and `invalid_events` dictionary below.
# To test valid events, add event item with the form:
# { ( '<schema id>', <version> ) : { <event_data> } }
valid_events = [
(
'hub.jupyter.org/server-action',
1,
dict(action='start', username='test-username', servername='test-servername'),
)
]
# To test invalid events, add event item with the form:
# { ( '<schema id>', <version> ) : { <event_data> } }
invalid_events = [
# Missing required keys
('hub.jupyter.org/server-action', 1, dict(action='start'))
]
@pytest.fixture
def eventlog_sink(app):
"""Return eventlog and sink objects"""
sink = io.StringIO()
handler = logging.StreamHandler(sink)
# Update the EventLog config with handler
cfg = Config()
cfg.EventLog.handlers = [handler]
with mock.patch.object(app.config, 'EventLog', cfg.EventLog):
# recreate the eventlog object with our config
app.init_eventlog()
# return the sink from the fixture
yield app.eventlog, sink
# reset eventlog with original config
app.init_eventlog()
@pytest.mark.parametrize('schema, version, event', valid_events)
def test_valid_events(eventlog_sink, schema, version, event):
eventlog, sink = eventlog_sink
eventlog.allowed_schemas = [schema]
# Record event
eventlog.record_event(schema, version, event)
# Inspect consumed event
output = sink.getvalue()
assert output
data = json.loads(output)
# Verify event data was recorded
assert data is not None
@pytest.mark.parametrize('schema, version, event', invalid_events)
def test_invalid_events(eventlog_sink, schema, version, event):
eventlog, sink = eventlog_sink
eventlog.allowed_schemas = [schema]
# Make sure an error is thrown when bad events are recorded
with pytest.raises(jsonschema.ValidationError):
recorded_event = eventlog.record_event(schema, version, event)

View File

@@ -3,6 +3,7 @@ async_generator>=1.8
certipy>=0.1.2
entrypoints
jinja2
jupyter_telemetry
oauthlib>=3.0
pamela
prometheus_client>=0.0.21