From 3dca0df55f4938f157e159d85786c2e6a920bf31 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 31 Mar 2016 16:10:34 -0700 Subject: [PATCH 1/8] Add statsd to the base JupyterHub app Not actually emitting any metrics yet --- jupyterhub/app.py | 21 +++++++++++++++++++++ jupyterhub/emptyclass.py | 13 +++++++++++++ requirements.txt | 1 + 3 files changed, 35 insertions(+) create mode 100644 jupyterhub/emptyclass.py diff --git a/jupyterhub/app.py b/jupyterhub/app.py index 00a17448..e38115b3 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -12,6 +12,7 @@ import signal import socket import sys import threading +import statsd from datetime import datetime from distutils.version import LooseVersion as V from getpass import getuser @@ -59,6 +60,10 @@ from .utils import ( from .auth import Authenticator, PAMAuthenticator from .spawner import Spawner, LocalProcessSpawner +# For faking stats +from .emptyclass import EmptyClass + + common_aliases = { 'log-level': 'Application.log_level', 'f': 'JupyterHub.config_file', @@ -492,6 +497,22 @@ class JupyterHub(Application): help="Extra log handlers to set on JupyterHub logger", ).tag(config=True) + @property + def statsd(self): + if hasattr(self, '_statsd'): + return self._statsd + if self.statsd_host: + self._statsd = statsd.StatsClient( + self.statsd_host, + self.statsd_port, + self.statsd_prefix + ) + return self._statsd + else: + # return an empty mock object! + self._statsd = EmptyClass() + return self._statsd + def init_logging(self): # This prevents double log messages because tornado use a root logger that # self.log is a child of. The logging module dipatches log messages to a log diff --git a/jupyterhub/emptyclass.py b/jupyterhub/emptyclass.py new file mode 100644 index 00000000..b3eda4be --- /dev/null +++ b/jupyterhub/emptyclass.py @@ -0,0 +1,13 @@ +""" +Simple empty class that returns itself for all functions called on it. +This allows us to call any method of any name on this, and it'll return another +instance of itself that'll allow any method to be called on it. + +Primarily used to mock out the statsd client when statsd is not being used +""" +class EmptyClass: + def empty_function(self, *args, **kwargs): + return self + + def __getattr__(self, attr): + return self.empty_function diff --git a/requirements.txt b/requirements.txt index df714b52..c20e7f0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,6 @@ traitlets>=4.1 tornado>=4.1 jinja2 pamela +statsd sqlalchemy>=1.0 requests From 80556360acaaf8f8bdbeae7b4d52f91b212bbccd Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 31 Mar 2016 16:42:10 -0700 Subject: [PATCH 2/8] Add statsd to the base request handler --- jupyterhub/app.py | 1 + jupyterhub/handlers/base.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/jupyterhub/app.py b/jupyterhub/app.py index e38115b3..53200fc3 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -1012,6 +1012,7 @@ class JupyterHub(Application): version_hash=version_hash, subdomain_host=subdomain_host, domain=domain, + statsd=self.statsd, ) # allow configured settings to have priority settings.update(self.tornado_settings) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index c03cc36f..80e3987d 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -75,6 +75,10 @@ class BaseHandler(RequestHandler): def proxy(self): return self.settings['proxy'] + @property + def statsd(self): + return self.settings['statsd'] + @property def authenticator(self): return self.settings.get('authenticator', None) From cac5175c9b126157b6ca2c24c0783b849990d97c Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 31 Mar 2016 16:42:25 -0700 Subject: [PATCH 3/8] Send CSP metrics to statsd --- jupyterhub/handlers/base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 80e3987d..b4b8ad12 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -512,8 +512,12 @@ class CSPReportHandler(BaseHandler): @web.authenticated def post(self): '''Log a content security policy violation report''' - self.log.warning("Content security violation: %s", - self.request.body.decode('utf8', 'replace')) + self.log.warning( + "Content security violation: %s", + self.request.body.decode('utf8', 'replace') + ) + # Report it to statsd as well + self.statsd.incr('csp_report') default_handlers = [ (r'/user/([^/]+)(/.*)?', UserSpawnHandler), From 788129da1232673dacb48299bb86d30f18c1bf89 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 31 Mar 2016 16:57:25 -0700 Subject: [PATCH 4/8] Send metrics for login and logout actions --- jupyterhub/handlers/login.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/jupyterhub/handlers/login.py b/jupyterhub/handlers/login.py index cb1a2c12..0833cf24 100644 --- a/jupyterhub/handlers/login.py +++ b/jupyterhub/handlers/login.py @@ -20,6 +20,7 @@ class LogoutHandler(BaseHandler): self.clear_login_cookie(name) user.other_user_cookies = set([]) self.redirect(self.hub.server.base_url, permanent=False) + self.statsd.incr('logout') class LoginHandler(BaseHandler): @@ -35,6 +36,7 @@ class LoginHandler(BaseHandler): ) def get(self): + self.statsd.incr('login.request') next_url = self.get_argument('next', '') if not next_url.startswith('/'): # disallow non-absolute next URLs (e.g. full URLs) @@ -61,8 +63,13 @@ class LoginHandler(BaseHandler): for arg in self.request.arguments: data[arg] = self.get_argument(arg) + auth_timer = self.statsd.timer('login.authenticate') username = yield self.authenticate(data) + auth_timer.stop(send=False) + if username: + self.statsd.incr('login.success') + self.statsd.timing('login.authenticate.success', auth_timer.ms) user = self.user_from_username(username) already_running = False if user.spawner: @@ -78,6 +85,8 @@ class LoginHandler(BaseHandler): self.redirect(next_url) self.log.info("User logged in: %s", username) else: + self.statsd.incr('login.failure') + self.statsd.timing('login.authenticate.failure', auth_timer.ms) self.log.debug("Failed login for %s", data.get('username', 'unknown user')) html = self._render( login_error='Invalid username or password', From 5a15d7a219ee90766c05ad0688b9094e03625593 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 31 Mar 2016 17:05:28 -0700 Subject: [PATCH 5/8] Actually start the timer --- jupyterhub/handlers/login.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyterhub/handlers/login.py b/jupyterhub/handlers/login.py index 0833cf24..2b969252 100644 --- a/jupyterhub/handlers/login.py +++ b/jupyterhub/handlers/login.py @@ -63,7 +63,7 @@ class LoginHandler(BaseHandler): for arg in self.request.arguments: data[arg] = self.get_argument(arg) - auth_timer = self.statsd.timer('login.authenticate') + auth_timer = self.statsd.timer('login.authenticate').start() username = yield self.authenticate(data) auth_timer.stop(send=False) From 4644e7019e8382f36257a574ef8e3e3a48dad3a1 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 31 Mar 2016 18:14:09 -0700 Subject: [PATCH 6/8] Send metrics about running and active users Uses the standard user last-updated activity callback --- jupyterhub/app.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/jupyterhub/app.py b/jupyterhub/app.py index 53200fc3..2c975234 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -1134,6 +1134,8 @@ class JupyterHub(Application): def update_last_activity(self): """Update User.last_activity timestamps from the proxy""" routes = yield self.proxy.get_routes() + users_count = 0 + active_users_count = 0 for prefix, route in routes.items(): if 'user' not in route: # not a user route, ignore it @@ -1147,6 +1149,12 @@ class JupyterHub(Application): except Exception: dt = datetime.strptime(route['last_activity'], ISO8601_s) user.last_activity = max(user.last_activity, dt) + # FIXME: Make this configurable duration. 30 minutes for now! + if (datetime.now() - user.last_activity).total_seconds() < 30 * 60: + active_users_count += 1 + users_count += 1 + self.statsd.gauge('users.running', users_count) + self.statsd.gauge('users.active', active_users_count) self.db.commit() yield self.proxy.check_routes(self.users, routes) From 2dd3d3c448604bbaf8889a0c61eb0dcd58217c29 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Fri, 1 Apr 2016 00:50:38 -0700 Subject: [PATCH 7/8] Send timing info about spawner success / failure --- jupyterhub/handlers/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index b4b8ad12..f5ac56d8 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -304,6 +304,7 @@ class BaseHandler(RequestHandler): return toc = IOLoop.current().time() self.log.info("User %s server took %.3f seconds to start", user.name, toc-tic) + self.statsd.timing('spawner.success', (toc - tic) * 1000) yield self.proxy.add_user(user) user.spawner.add_poll_callback(self.user_stopped, user) @@ -327,6 +328,8 @@ class BaseHandler(RequestHandler): # schedule finish for when the user finishes spawning IOLoop.current().add_future(f, finish_user_spawn) else: + toc = IOLoop.current().time() + self.statsd.timing('spawner.failure', (toc - tic) * 1000) raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status) else: yield finish_user_spawn() From 9ffebd0c5eee5155ba35b6ce02f6e12abf5b8ece Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Fri, 1 Apr 2016 14:05:02 -0700 Subject: [PATCH 8/8] Send metrics about various redirects from User spawning --- jupyterhub/handlers/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index f5ac56d8..06847c4b 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -478,6 +478,7 @@ class UserSpawnHandler(BaseHandler): if current_user.spawner: if current_user.spawn_pending: # spawn has started, but not finished + self.statsd.incr('redirects.user_spawn_pending', 1) html = self.render_template("spawn_pending.html", user=current_user) self.finish(html) return @@ -497,12 +498,15 @@ class UserSpawnHandler(BaseHandler): if self.subdomain_host: target = current_user.host + target self.redirect(target) + self.statsd.incr('redirects.user_after_login') elif current_user: # logged in as a different user, redirect + self.statsd.incr('redirects.user_to_user', 1) target = url_path_join(current_user.url, user_path or '') self.redirect(target) else: # not logged in, clear any cookies and reload + self.statsd.incr('redirects.user_to_login', 1) self.clear_login_cookie() self.redirect(url_concat( self.settings['login_url'],