From 8e3c4b1925d3bde5d32d52e4a450e11ce1083bef Mon Sep 17 00:00:00 2001
From: Min RK <benjaminrk@gmail.com>
Date: Sat, 15 Jul 2017 12:56:47 +0200
Subject: [PATCH 1/2] apply exponential backoff to all waits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Waiting for servers to come up and shut down was polled at an even interval of 100ms. If things are slow and busy, this is a lot if waiting events. exponential backoff reduces the number of callbacks triggered by slow spawners.

This may improve the load a bit when there’s a bunch of outstanding spawns.
---
 jupyterhub/spawner.py | 10 +++++++---
 jupyterhub/utils.py   | 14 +++++++++-----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py
index b9b25c4f..d6c04373 100644
--- a/jupyterhub/spawner.py
+++ b/jupyterhub/spawner.py
@@ -18,7 +18,7 @@ from subprocess import Popen
 from tempfile import mkdtemp
 
 from tornado import gen
-from tornado.ioloop import PeriodicCallback
+from tornado.ioloop import PeriodicCallback, IOLoop
 
 from traitlets.config import LoggingConfigurable
 from traitlets import (
@@ -635,12 +635,16 @@ class Spawner(LoggingConfigurable):
     @gen.coroutine
     def wait_for_death(self, timeout=10):
         """Wait for the single-user server to die, up to timeout seconds"""
-        for i in range(int(timeout / self.death_interval)):
+        loop = IOLoop.current()
+        tic = loop.time()
+        dt = self.death_interval
+        while dt > 0:
             status = yield self.poll()
             if status is not None:
                 break
             else:
-                yield gen.sleep(self.death_interval)
+                yield gen.sleep(dt)
+            dt = min(dt * 2, timeout - (loop.time() - tic))
 
 
 def _try_setcwd(path):
diff --git a/jupyterhub/utils.py b/jupyterhub/utils.py
index 1ac76a2e..ad14dc13 100644
--- a/jupyterhub/utils.py
+++ b/jupyterhub/utils.py
@@ -56,11 +56,13 @@ def wait_for_server(ip, port, timeout=10):
         ip = '127.0.0.1'
     loop = ioloop.IOLoop.current()
     tic = loop.time()
-    while loop.time() - tic < timeout:
+    dt = 0.1
+    while dt > 0:
         if can_connect(ip, port):
             return
         else:
-            yield gen.sleep(0.1)
+            yield gen.sleep(dt)
+        dt = min(dt * 2, timeout - (loop.time() - tic))
     raise TimeoutError(
         "Server at {ip}:{port} didn't respond in {timeout} seconds".format(**locals())
     )
@@ -75,7 +77,8 @@ def wait_for_http_server(url, timeout=10):
     loop = ioloop.IOLoop.current()
     tic = loop.time()
     client = AsyncHTTPClient()
-    while loop.time() - tic < timeout:
+    dt = 0.1
+    while dt > 0:
         try:
             r = yield client.fetch(url, follow_redirects=False)
         except HTTPError as e:
@@ -86,16 +89,17 @@ def wait_for_http_server(url, timeout=10):
                     # but 502 or other proxy error is conceivable
                     app_log.warning(
                         "Server at %s responded with error: %s", url, e.code)
-                yield gen.sleep(0.1)
+                yield gen.sleep(dt)
             else:
                 app_log.debug("Server at %s responded with %s", url, e.code)
                 return e.response
         except (OSError, socket.error) as e:
             if e.errno not in {errno.ECONNABORTED, errno.ECONNREFUSED, errno.ECONNRESET}:
                 app_log.warning("Failed to connect to %s (%s)", url, e)
-            yield gen.sleep(0.1)
+            yield gen.sleep(dt)
         else:
             return r
+        dt = min(dt * 2, timeout - (loop.time() - tic))
 
     raise TimeoutError(
         "Server at {url} didn't respond in {timeout} seconds".format(**locals())

From efa6a33b0a8f5835653e77cd5ffe3e0a753156ce Mon Sep 17 00:00:00 2001
From: Min RK <benjaminrk@gmail.com>
Date: Sat, 15 Jul 2017 13:05:39 +0200
Subject: [PATCH 2/2] variables for exponential falloff

---
 jupyterhub/spawner.py |  6 +++---
 jupyterhub/utils.py   | 14 ++++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py
index d6c04373..f2f41efb 100644
--- a/jupyterhub/spawner.py
+++ b/jupyterhub/spawner.py
@@ -27,7 +27,7 @@ from traitlets import (
 )
 
 from .traitlets import Command, ByteSpecification
-from .utils import random_port, url_path_join
+from .utils import random_port, url_path_join, DT_MIN, DT_MAX, DT_SCALE
 
 
 class Spawner(LoggingConfigurable):
@@ -630,7 +630,7 @@ class Spawner(LoggingConfigurable):
                 self.log.exception("Unhandled error in poll callback for %s", self)
         return status
 
-    death_interval = Float(0.1)
+    death_interval = Float(DT_MIN)
 
     @gen.coroutine
     def wait_for_death(self, timeout=10):
@@ -644,7 +644,7 @@ class Spawner(LoggingConfigurable):
                 break
             else:
                 yield gen.sleep(dt)
-            dt = min(dt * 2, timeout - (loop.time() - tic))
+            dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic))
 
 
 def _try_setcwd(path):
diff --git a/jupyterhub/utils.py b/jupyterhub/utils.py
index ad14dc13..07ab161f 100644
--- a/jupyterhub/utils.py
+++ b/jupyterhub/utils.py
@@ -48,6 +48,12 @@ def can_connect(ip, port):
     else:
         return True
 
+# exponential falloff factors:
+# start at 100ms, falloff by 2x
+# never longer than 5s
+DT_MIN = 0.1
+DT_SCALE = 2
+DT_MAX = 5
 
 @gen.coroutine
 def wait_for_server(ip, port, timeout=10):
@@ -56,13 +62,13 @@ def wait_for_server(ip, port, timeout=10):
         ip = '127.0.0.1'
     loop = ioloop.IOLoop.current()
     tic = loop.time()
-    dt = 0.1
+    dt = DT_MIN
     while dt > 0:
         if can_connect(ip, port):
             return
         else:
             yield gen.sleep(dt)
-        dt = min(dt * 2, timeout - (loop.time() - tic))
+        dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic))
     raise TimeoutError(
         "Server at {ip}:{port} didn't respond in {timeout} seconds".format(**locals())
     )
@@ -77,7 +83,7 @@ def wait_for_http_server(url, timeout=10):
     loop = ioloop.IOLoop.current()
     tic = loop.time()
     client = AsyncHTTPClient()
-    dt = 0.1
+    dt = DT_MIN
     while dt > 0:
         try:
             r = yield client.fetch(url, follow_redirects=False)
@@ -99,7 +105,7 @@ def wait_for_http_server(url, timeout=10):
             yield gen.sleep(dt)
         else:
             return r
-        dt = min(dt * 2, timeout - (loop.time() - tic))
+        dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic))
 
     raise TimeoutError(
         "Server at {url} didn't respond in {timeout} seconds".format(**locals())