From 339758ec42135e92687a29ed04ca17993ce3fe6e Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 13:23:29 -0800 Subject: [PATCH 1/9] Add RED prometheus metrics for all requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces Prometheus for exposing metrics about JupyterHub's operation. We expose a standard /metrics endpoint that can be queried without authentication. We take on prometheus_client as an unconditional dependency to both simplify code & because it is a pure python package with no dependencies itself. The first pass adds 'RED' style metrics for all HTTP requests. http://rancher.com/red-method-for-prometheus-3-key-metrics-for-monitoring/ has some info on the RED method, but to summarize: For each request type, record at least the following metrics Rate – the number of requests, per second, your services are serving. Errors – the number of failed requests per second. Duration – The amount of time each request takes expressed as a time interval. This instantly gives us a lot of useful metrics in a very compact form. --- jupyterhub/handlers/__init__.py | 4 ++-- jupyterhub/handlers/metrics.py | 17 +++++++++++++++++ jupyterhub/log.py | 2 ++ jupyterhub/metrics.py | 28 ++++++++++++++++++++++++++++ requirements.txt | 1 + 5 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 jupyterhub/handlers/metrics.py create mode 100644 jupyterhub/metrics.py diff --git a/jupyterhub/handlers/__init__.py b/jupyterhub/handlers/__init__.py index 8b2ffd58..0823c183 100644 --- a/jupyterhub/handlers/__init__.py +++ b/jupyterhub/handlers/__init__.py @@ -1,8 +1,8 @@ from .base import * from .login import * -from . import base, pages, login +from . import base, pages, login, metrics default_handlers = [] -for mod in (base, pages, login): +for mod in (base, pages, login, metrics): default_handlers.extend(mod.default_handlers) diff --git a/jupyterhub/handlers/metrics.py b/jupyterhub/handlers/metrics.py new file mode 100644 index 00000000..60b934f9 --- /dev/null +++ b/jupyterhub/handlers/metrics.py @@ -0,0 +1,17 @@ +from prometheus_client import REGISTRY, CONTENT_TYPE_LATEST, generate_latest +from tornado import gen + +from .base import BaseHandler + +class MetricsHandler(BaseHandler): + """ + Handler to serve Prometheus metrics + """ + @gen.coroutine + def get(self): + self.set_header('Content-Type', CONTENT_TYPE_LATEST) + self.write(generate_latest(REGISTRY)) + +default_handlers = [ + (r'/metrics$', MetricsHandler) +] diff --git a/jupyterhub/log.py b/jupyterhub/log.py index 60b6288d..36a3640e 100644 --- a/jupyterhub/log.py +++ b/jupyterhub/log.py @@ -8,6 +8,7 @@ import traceback from tornado.log import LogFormatter, access_log from tornado.web import StaticFileHandler, HTTPError +from .metrics import prometheus_log_method def coroutine_traceback(typ, value, tb): """Scrub coroutine frames from a traceback @@ -120,3 +121,4 @@ def log_request(handler): if location: ns['location'] = ' → {}'.format(location) log_method(msg.format(**ns)) + prometheus_log_method(handler) diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py new file mode 100644 index 00000000..749ac8e6 --- /dev/null +++ b/jupyterhub/metrics.py @@ -0,0 +1,28 @@ +""" +Prometheus metrics exported by JupyterHub +""" +from prometheus_client import Histogram + +REQUEST_DURATION_SECONDS = Histogram( + 'request_duration_seconds', + 'request duration for all HTTP requests', + ['method', 'handler', 'code'] +) + +def prometheus_log_method(handler): + """ + Tornado log handler for recording RED metrics + + We record the following metrics: + Rate – the number of requests, per second, your services are serving. + Errors – the number of failed requests per second. + Duration – The amount of time each request takes expressed as a time interval. + + We use a fully qualified name of the handler as a label, + rather than every url path to reduce cardinality. + """ + REQUEST_DURATION_SECONDS.labels( + method=handler.request.method, + handler='{}.{}'.format(handler.__class__.__module__, type(handler).__name__), + code=handler.get_status() + ).observe(handler.request.request_time()) diff --git a/requirements.txt b/requirements.txt index a4b660af..d4fcc25a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ pamela python-oauth2>=1.0 SQLAlchemy>=1.1 requests +prometheus_client From 6594e8839076f3246d38a904958df184a78dd651 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 14:54:34 -0800 Subject: [PATCH 2/9] Add metric recording spawn durations Try to hit every possible exit point from the spawn_single_server method, with an appropriate status code. The default histogram buckets are also meant for request latencies, but spawning usually takes longer so we use custom buckets --- jupyterhub/handlers/base.py | 28 ++++++++++++++++++++++++++++ jupyterhub/metrics.py | 7 +++++++ 2 files changed, 35 insertions(+) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index aefea6fb..62e498c2 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -5,6 +5,7 @@ import copy import re +import time from datetime import timedelta from http.client import responses from urllib.parse import urlparse, urlunparse, parse_qs, urlencode @@ -22,6 +23,7 @@ from .. import orm from ..objects import Server from ..spawner import LocalProcessSpawner from ..utils import url_path_join +from ..metrics import SPAWN_DURATION_SECONDS # pattern for the authentication token header auth_header_pat = re.compile(r'^(?:token|bearer)\s+([^\s]+)$', flags=re.IGNORECASE) @@ -388,6 +390,7 @@ class BaseHandler(RequestHandler): @gen.coroutine def spawn_single_user(self, user, server_name='', options=None): # in case of error, include 'try again from /hub/home' message + spawn_starttime = time.perf_counter() self.extra_error_html = self.spawn_home_error user_server_name = user.name @@ -397,6 +400,11 @@ class BaseHandler(RequestHandler): if server_name in user.spawners and user.spawners[server_name].pending: pending = user.spawners[server_name].pending + SPAWN_DURATION_SECONDS.labels( + status='already-pending' + ).observe( + time.perf_counter() - spawn_starttime + ) raise RuntimeError("%s pending %s" % (user_server_name, pending)) # count active servers and pending spawns @@ -415,6 +423,11 @@ class BaseHandler(RequestHandler): '%s pending spawns, throttling', spawn_pending_count, ) + SPAWN_DURATION_SECONDS.labels( + status='throttled' + ).observe( + time.perf_counter() - spawn_starttime + ) raise web.HTTPError( 429, "User startup rate limit exceeded. Try again in a few minutes.", @@ -424,6 +437,11 @@ class BaseHandler(RequestHandler): '%s servers active, no space available', active_count, ) + SPAWN_DURATION_SECONDS.labels( + status='too-many-users' + ).observe( + time.perf_counter() - spawn_starttime + ) raise web.HTTPError(429, "Active user limit exceeded. Try again in a few minutes.") tic = IOLoop.current().time() @@ -456,6 +474,11 @@ class BaseHandler(RequestHandler): toc = IOLoop.current().time() self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic) self.statsd.timing('spawner.success', (toc - tic) * 1000) + SPAWN_DURATION_SECONDS.labels( + status='success' + ).observe( + time.perf_counter() - spawn_starttime + ) spawner._proxy_pending = True try: yield self.proxy.add_user(user, server_name) @@ -499,6 +522,11 @@ class BaseHandler(RequestHandler): if status is not None: toc = IOLoop.current().time() self.statsd.timing('spawner.failure', (toc - tic) * 1000) + SPAWN_DURATION_SECONDS.labels( + status='failed' + ).observe( + time.perf_counter() - spawn_starttime + ) raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % ( status, spawner._log_name)) diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py index 749ac8e6..89ce2b34 100644 --- a/jupyterhub/metrics.py +++ b/jupyterhub/metrics.py @@ -9,6 +9,13 @@ REQUEST_DURATION_SECONDS = Histogram( ['method', 'handler', 'code'] ) +SPAWN_DURATION_SECONDS = Histogram( + 'spawn_duration_seconds', + 'spawn duration for all server spawns', + ['status'], + buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, float("inf")] +) + def prometheus_log_method(handler): """ Tornado log handler for recording RED metrics From ce3a940b112c36c9c09514022d5b305b5d1cda76 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 15:01:47 -0800 Subject: [PATCH 3/9] Add histogram metric for proxy route addition --- jupyterhub/handlers/base.py | 14 +++++++++++++- jupyterhub/metrics.py | 6 ++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 62e498c2..c51e92d4 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -23,7 +23,7 @@ from .. import orm from ..objects import Server from ..spawner import LocalProcessSpawner from ..utils import url_path_join -from ..metrics import SPAWN_DURATION_SECONDS +from ..metrics import SPAWN_DURATION_SECONDS, PROXY_ADD_DURATION_SECONDS # pattern for the authentication token header auth_header_pat = re.compile(r'^(?:token|bearer)\s+([^\s]+)$', flags=re.IGNORECASE) @@ -479,13 +479,25 @@ class BaseHandler(RequestHandler): ).observe( time.perf_counter() - spawn_starttime ) + proxy_add_starttime = time.perf_counter() spawner._proxy_pending = True try: yield self.proxy.add_user(user, server_name) + + PROXY_ADD_DURATION_SECONDS.labels( + status='success' + ).observe( + time.perf_counter() - proxy_add_starttime + ) except Exception: self.log.exception("Failed to add %s to proxy!", user_server_name) self.log.error("Stopping %s to avoid inconsistent state", user_server_name) yield user.stop() + PROXY_ADD_DURATION_SECONDS.labels( + status='failure' + ).observe( + time.perf_counter() - proxy_add_starttime + ) else: spawner.add_poll_callback(self.user_stopped, user, server_name) finally: diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py index 89ce2b34..d36d853c 100644 --- a/jupyterhub/metrics.py +++ b/jupyterhub/metrics.py @@ -16,6 +16,12 @@ SPAWN_DURATION_SECONDS = Histogram( buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, float("inf")] ) +PROXY_ADD_DURATION_SECONDS = Histogram( + 'proxy_add_duration_seconds', + 'duration for adding user routes to proxy', + ['status'] +) + def prometheus_log_method(handler): """ Tornado log handler for recording RED metrics From 352df39454e52ec2189efdb927ca8dff38811389 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 16:52:19 -0800 Subject: [PATCH 4/9] Add version requirement for prometheus_client --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d4fcc25a..7a71f61a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ pamela python-oauth2>=1.0 SQLAlchemy>=1.1 requests -prometheus_client +prometheus_client>=0.0.21 From 2559632079f8ec7e0bfa009238ae1dc4c62a0045 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 16:59:35 -0800 Subject: [PATCH 5/9] Expand prometheus related docstrings a bit more --- jupyterhub/log.py | 1 + jupyterhub/metrics.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/jupyterhub/log.py b/jupyterhub/log.py index 36a3640e..2405edf3 100644 --- a/jupyterhub/log.py +++ b/jupyterhub/log.py @@ -69,6 +69,7 @@ def log_request(handler): - get proxied IP instead of proxy IP - log referer for redirect and failed requests - log user-agent for failed requests + - record per-request metrics in prometheus """ status = handler.get_status() request = handler.request diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py index d36d853c..b75dde1c 100644 --- a/jupyterhub/metrics.py +++ b/jupyterhub/metrics.py @@ -24,7 +24,7 @@ PROXY_ADD_DURATION_SECONDS = Histogram( def prometheus_log_method(handler): """ - Tornado log handler for recording RED metrics + Tornado log handler for recording RED metrics. We record the following metrics: Rate – the number of requests, per second, your services are serving. @@ -33,6 +33,10 @@ def prometheus_log_method(handler): We use a fully qualified name of the handler as a label, rather than every url path to reduce cardinality. + + This function should be either the value of or called from a function + that is the 'log_function' tornado setting. This makes it get called + at the end of every request, allowing us to record the metrics we need. """ REQUEST_DURATION_SECONDS.labels( method=handler.request.method, From 2099cd37fa9f86708e41e3731fb6a4fc6f171a06 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 17:00:15 -0800 Subject: [PATCH 6/9] s/starttime/start_time/ --- jupyterhub/handlers/base.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index c51e92d4..5167da82 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -390,7 +390,7 @@ class BaseHandler(RequestHandler): @gen.coroutine def spawn_single_user(self, user, server_name='', options=None): # in case of error, include 'try again from /hub/home' message - spawn_starttime = time.perf_counter() + spawn_start_time = time.perf_counter() self.extra_error_html = self.spawn_home_error user_server_name = user.name @@ -403,7 +403,7 @@ class BaseHandler(RequestHandler): SPAWN_DURATION_SECONDS.labels( status='already-pending' ).observe( - time.perf_counter() - spawn_starttime + time.perf_counter() - spawn_start_time ) raise RuntimeError("%s pending %s" % (user_server_name, pending)) @@ -426,7 +426,7 @@ class BaseHandler(RequestHandler): SPAWN_DURATION_SECONDS.labels( status='throttled' ).observe( - time.perf_counter() - spawn_starttime + time.perf_counter() - spawn_start_time ) raise web.HTTPError( 429, @@ -440,7 +440,7 @@ class BaseHandler(RequestHandler): SPAWN_DURATION_SECONDS.labels( status='too-many-users' ).observe( - time.perf_counter() - spawn_starttime + time.perf_counter() - spawn_start_time ) raise web.HTTPError(429, "Active user limit exceeded. Try again in a few minutes.") @@ -477,9 +477,9 @@ class BaseHandler(RequestHandler): SPAWN_DURATION_SECONDS.labels( status='success' ).observe( - time.perf_counter() - spawn_starttime + time.perf_counter() - spawn_start_time ) - proxy_add_starttime = time.perf_counter() + proxy_add_start_time = time.perf_counter() spawner._proxy_pending = True try: yield self.proxy.add_user(user, server_name) @@ -487,7 +487,7 @@ class BaseHandler(RequestHandler): PROXY_ADD_DURATION_SECONDS.labels( status='success' ).observe( - time.perf_counter() - proxy_add_starttime + time.perf_counter() - proxy_add_start_time ) except Exception: self.log.exception("Failed to add %s to proxy!", user_server_name) @@ -496,7 +496,7 @@ class BaseHandler(RequestHandler): PROXY_ADD_DURATION_SECONDS.labels( status='failure' ).observe( - time.perf_counter() - proxy_add_starttime + time.perf_counter() - proxy_add_start_time ) else: spawner.add_poll_callback(self.user_stopped, user, server_name) @@ -537,7 +537,7 @@ class BaseHandler(RequestHandler): SPAWN_DURATION_SECONDS.labels( status='failed' ).observe( - time.perf_counter() - spawn_starttime + time.perf_counter() - spawn_start_time ) raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % ( status, spawner._log_name)) From c64f23a64ae8d4c15bd779b05b6939957217e9a8 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 17:04:10 -0800 Subject: [PATCH 7/9] Add note about metric naming conventions --- jupyterhub/metrics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py index b75dde1c..df724b08 100644 --- a/jupyterhub/metrics.py +++ b/jupyterhub/metrics.py @@ -1,5 +1,8 @@ """ Prometheus metrics exported by JupyterHub + +Read https://prometheus.io/docs/practices/naming/ for naming +conventions for metrics & labels. """ from prometheus_client import Histogram From ea99c58da5f24362632bc6c181135da2d33f19cd Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 17:04:44 -0800 Subject: [PATCH 8/9] Clarify custom bucket sizes for spawn time histogram --- jupyterhub/metrics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py index df724b08..ba885446 100644 --- a/jupyterhub/metrics.py +++ b/jupyterhub/metrics.py @@ -16,6 +16,8 @@ SPAWN_DURATION_SECONDS = Histogram( 'spawn_duration_seconds', 'spawn duration for all server spawns', ['status'], + # Use custom bucket sizes, since the default bucket ranges + # are meant for quick running processes. Spawns can take a while! buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, float("inf")] ) From 3cd526c0193429e114bfa80fb90ecb307376793b Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 10 Dec 2017 21:23:32 -0800 Subject: [PATCH 9/9] Make sure our metrics don't appear & disappear intermittently Create all timeseries from the beginning, regardless of wether they happen or not. Also rename metric objects for consistency. --- jupyterhub/handlers/base.py | 45 ++++++++++++++------------------ jupyterhub/metrics.py | 52 ++++++++++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 30 deletions(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 5167da82..b5e489b3 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -23,7 +23,10 @@ from .. import orm from ..objects import Server from ..spawner import LocalProcessSpawner from ..utils import url_path_join -from ..metrics import SPAWN_DURATION_SECONDS, PROXY_ADD_DURATION_SECONDS +from ..metrics import ( + SERVER_SPAWN_DURATION_SECONDS, ServerSpawnStatus, + PROXY_ADD_DURATION_SECONDS, ProxyAddStatus +) # pattern for the authentication token header auth_header_pat = re.compile(r'^(?:token|bearer)\s+([^\s]+)$', flags=re.IGNORECASE) @@ -400,11 +403,9 @@ class BaseHandler(RequestHandler): if server_name in user.spawners and user.spawners[server_name].pending: pending = user.spawners[server_name].pending - SPAWN_DURATION_SECONDS.labels( - status='already-pending' - ).observe( - time.perf_counter() - spawn_start_time - ) + SERVER_SPAWN_DURATION_SECONDS.labels( + status=ServerSpawnStatus.already_pending + ).observe(time.perf_counter() - spawn_start_time) raise RuntimeError("%s pending %s" % (user_server_name, pending)) # count active servers and pending spawns @@ -423,11 +424,9 @@ class BaseHandler(RequestHandler): '%s pending spawns, throttling', spawn_pending_count, ) - SPAWN_DURATION_SECONDS.labels( - status='throttled' - ).observe( - time.perf_counter() - spawn_start_time - ) + SERVER_SPAWN_DURATION_SECONDS.labels( + status=ServerSpawnStatus.throttled + ).observe(time.perf_counter() - spawn_start_time) raise web.HTTPError( 429, "User startup rate limit exceeded. Try again in a few minutes.", @@ -437,11 +436,9 @@ class BaseHandler(RequestHandler): '%s servers active, no space available', active_count, ) - SPAWN_DURATION_SECONDS.labels( - status='too-many-users' - ).observe( - time.perf_counter() - spawn_start_time - ) + SERVER_SPAWN_DURATION_SECONDS.labels( + status=ServerSpawnStatus.too_many_users + ).observe(time.perf_counter() - spawn_start_time) raise web.HTTPError(429, "Active user limit exceeded. Try again in a few minutes.") tic = IOLoop.current().time() @@ -474,11 +471,9 @@ class BaseHandler(RequestHandler): toc = IOLoop.current().time() self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic) self.statsd.timing('spawner.success', (toc - tic) * 1000) - SPAWN_DURATION_SECONDS.labels( - status='success' - ).observe( - time.perf_counter() - spawn_start_time - ) + SERVER_SPAWN_DURATION_SECONDS.labels( + status=ServerSpawnStatus.success + ).observe(time.perf_counter() - spawn_start_time) proxy_add_start_time = time.perf_counter() spawner._proxy_pending = True try: @@ -534,11 +529,9 @@ class BaseHandler(RequestHandler): if status is not None: toc = IOLoop.current().time() self.statsd.timing('spawner.failure', (toc - tic) * 1000) - SPAWN_DURATION_SECONDS.labels( - status='failed' - ).observe( - time.perf_counter() - spawn_start_time - ) + SERVER_SPAWN_DURATION_SECONDS.labels( + status=ServerSpawnStatus.failure + ).observe(time.perf_counter() - spawn_start_time) raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % ( status, spawner._log_name)) diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py index ba885446..68d6673e 100644 --- a/jupyterhub/metrics.py +++ b/jupyterhub/metrics.py @@ -2,8 +2,21 @@ Prometheus metrics exported by JupyterHub Read https://prometheus.io/docs/practices/naming/ for naming -conventions for metrics & labels. +conventions for metrics & labels. We generally prefer naming them +`__`. So a histogram that's tracking +the duration (in seconds) of servers spawning would be called +SERVER_SPAWN_DURATION_SECONDS. + +We also create an Enum for each 'status' type label in every metric +we collect. This is to make sure that the metrics exist regardless +of the condition happening or not. For example, if we don't explicitly +create them, the metric spawn_duration_seconds{status="failure"} +will not actually exist until the first failure. This makes dashboarding +and alerting difficult, so we explicitly list statuses and create +them manually here. """ +from enum import Enum + from prometheus_client import Histogram REQUEST_DURATION_SECONDS = Histogram( @@ -12,21 +25,52 @@ REQUEST_DURATION_SECONDS = Histogram( ['method', 'handler', 'code'] ) -SPAWN_DURATION_SECONDS = Histogram( - 'spawn_duration_seconds', - 'spawn duration for all server spawns', +SERVER_SPAWN_DURATION_SECONDS = Histogram( + 'server_spawn_duration_seconds', + 'time taken for server spawning operation', ['status'], # Use custom bucket sizes, since the default bucket ranges # are meant for quick running processes. Spawns can take a while! buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, float("inf")] ) +class ServerSpawnStatus(Enum): + """ + Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS + """ + success = 'success' + failure = 'failure' + already_pending = 'already-pending' + throttled = 'throttled' + too_many_users = 'too-many-users' + + def __str__(self): + return self.value + +for s in ServerSpawnStatus: + # Create empty metrics with the given status + SERVER_SPAWN_DURATION_SECONDS.labels(status=s) + + PROXY_ADD_DURATION_SECONDS = Histogram( 'proxy_add_duration_seconds', 'duration for adding user routes to proxy', ['status'] ) +class ProxyAddStatus(Enum): + """ + Possible values for 'status' label of PROXY_ADD_DURATION_SECONDS + """ + success = 'success' + failure = 'failure' + + def __str__(self): + return self.value + +for s in ProxyAddStatus: + PROXY_ADD_DURATION_SECONDS.labels(status=s) + def prometheus_log_method(handler): """ Tornado log handler for recording RED metrics.