diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py
index 5167da82..b5e489b3 100644
--- a/jupyterhub/handlers/base.py
+++ b/jupyterhub/handlers/base.py
@@ -23,7 +23,10 @@ from .. import orm
 from ..objects import Server
 from ..spawner import LocalProcessSpawner
 from ..utils import url_path_join
-from ..metrics import SPAWN_DURATION_SECONDS, PROXY_ADD_DURATION_SECONDS
+from ..metrics import (
+    SERVER_SPAWN_DURATION_SECONDS, ServerSpawnStatus,
+    PROXY_ADD_DURATION_SECONDS, ProxyAddStatus
+)
 
 # pattern for the authentication token header
 auth_header_pat = re.compile(r'^(?:token|bearer)\s+([^\s]+)$', flags=re.IGNORECASE)
@@ -400,11 +403,9 @@ class BaseHandler(RequestHandler):
 
         if server_name in user.spawners and user.spawners[server_name].pending:
             pending = user.spawners[server_name].pending
-            SPAWN_DURATION_SECONDS.labels(
-                status='already-pending'
-            ).observe(
-                time.perf_counter() - spawn_start_time
-            )
+            SERVER_SPAWN_DURATION_SECONDS.labels(
+                status=ServerSpawnStatus.already_pending
+            ).observe(time.perf_counter() - spawn_start_time)
             raise RuntimeError("%s pending %s" % (user_server_name, pending))
 
         # count active servers and pending spawns
@@ -423,11 +424,9 @@ class BaseHandler(RequestHandler):
                 '%s pending spawns, throttling',
                 spawn_pending_count,
             )
-            SPAWN_DURATION_SECONDS.labels(
-                status='throttled'
-            ).observe(
-                time.perf_counter() - spawn_start_time
-            )
+            SERVER_SPAWN_DURATION_SECONDS.labels(
+                status=ServerSpawnStatus.throttled
+            ).observe(time.perf_counter() - spawn_start_time)
             raise web.HTTPError(
                 429,
                 "User startup rate limit exceeded. Try again in a few minutes.",
@@ -437,11 +436,9 @@ class BaseHandler(RequestHandler):
                 '%s servers active, no space available',
                 active_count,
             )
-            SPAWN_DURATION_SECONDS.labels(
-                status='too-many-users'
-            ).observe(
-                time.perf_counter() - spawn_start_time
-            )
+            SERVER_SPAWN_DURATION_SECONDS.labels(
+                status=ServerSpawnStatus.too_many_users
+            ).observe(time.perf_counter() - spawn_start_time)
             raise web.HTTPError(429, "Active user limit exceeded. Try again in a few minutes.")
 
         tic = IOLoop.current().time()
@@ -474,11 +471,9 @@ class BaseHandler(RequestHandler):
             toc = IOLoop.current().time()
             self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic)
             self.statsd.timing('spawner.success', (toc - tic) * 1000)
-            SPAWN_DURATION_SECONDS.labels(
-                status='success'
-            ).observe(
-                time.perf_counter() - spawn_start_time
-            )
+            SERVER_SPAWN_DURATION_SECONDS.labels(
+                status=ServerSpawnStatus.success
+            ).observe(time.perf_counter() - spawn_start_time)
             proxy_add_start_time = time.perf_counter()
             spawner._proxy_pending = True
             try:
@@ -534,11 +529,9 @@ class BaseHandler(RequestHandler):
             if status is not None:
                 toc = IOLoop.current().time()
                 self.statsd.timing('spawner.failure', (toc - tic) * 1000)
-                SPAWN_DURATION_SECONDS.labels(
-                    status='failed'
-                ).observe(
-                    time.perf_counter() - spawn_start_time
-                )
+                SERVER_SPAWN_DURATION_SECONDS.labels(
+                    status=ServerSpawnStatus.failure
+                ).observe(time.perf_counter() - spawn_start_time)
                 raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % (
                     status, spawner._log_name))
 
diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py
index ba885446..68d6673e 100644
--- a/jupyterhub/metrics.py
+++ b/jupyterhub/metrics.py
@@ -2,8 +2,21 @@
 Prometheus metrics exported by JupyterHub
 
 Read https://prometheus.io/docs/practices/naming/ for naming
-conventions for metrics & labels.
+conventions for metrics & labels. We generally prefer naming them
+`<noun>_<verb>_<type_suffix>`. So a histogram that's tracking
+the duration (in seconds) of servers spawning would be called
+SERVER_SPAWN_DURATION_SECONDS.
+
+We also create an Enum for each 'status' type label in every metric
+we collect. This is to make sure that the metrics exist regardless
+of the condition happening or not. For example, if we don't explicitly
+create them, the metric spawn_duration_seconds{status="failure"}
+will not actually exist until the first failure. This makes dashboarding
+and alerting difficult, so we explicitly list statuses and create
+them manually here.
 """
+from enum import Enum
+
 from prometheus_client import Histogram
 
 REQUEST_DURATION_SECONDS = Histogram(
@@ -12,21 +25,52 @@ REQUEST_DURATION_SECONDS = Histogram(
     ['method', 'handler', 'code']
 )
 
-SPAWN_DURATION_SECONDS = Histogram(
-    'spawn_duration_seconds',
-    'spawn duration for all server spawns',
+SERVER_SPAWN_DURATION_SECONDS = Histogram(
+    'server_spawn_duration_seconds',
+    'time taken for server spawning operation',
     ['status'],
     # Use custom bucket sizes, since the default bucket ranges
     # are meant for quick running processes. Spawns can take a while!
     buckets=[0.5, 1, 2.5, 5, 10, 15, 30, 60, 120, float("inf")]
 )
 
+class ServerSpawnStatus(Enum):
+    """
+    Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS
+    """
+    success = 'success'
+    failure = 'failure'
+    already_pending = 'already-pending'
+    throttled = 'throttled'
+    too_many_users = 'too-many-users'
+
+    def __str__(self):
+        return self.value
+
+for s in ServerSpawnStatus:
+    # Create empty metrics with the given status
+    SERVER_SPAWN_DURATION_SECONDS.labels(status=s)
+
+
 PROXY_ADD_DURATION_SECONDS = Histogram(
     'proxy_add_duration_seconds',
     'duration for adding user routes to proxy',
     ['status']
 )
 
+class ProxyAddStatus(Enum):
+    """
+    Possible values for 'status' label of PROXY_ADD_DURATION_SECONDS
+    """
+    success = 'success'
+    failure = 'failure'
+
+    def __str__(self):
+        return self.value
+
+for s in ProxyAddStatus:
+    PROXY_ADD_DURATION_SECONDS.labels(status=s)
+
 def prometheus_log_method(handler):
     """
     Tornado log handler for recording RED metrics.