From 8e3c4b1925d3bde5d32d52e4a450e11ce1083bef Mon Sep 17 00:00:00 2001 From: Min RK Date: Sat, 15 Jul 2017 12:56:47 +0200 Subject: [PATCH 1/2] apply exponential backoff to all waits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Waiting for servers to come up and shut down was polled at an even interval of 100ms. If things are slow and busy, this is a lot if waiting events. exponential backoff reduces the number of callbacks triggered by slow spawners. This may improve the load a bit when there’s a bunch of outstanding spawns. --- jupyterhub/spawner.py | 10 +++++++--- jupyterhub/utils.py | 14 +++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index b9b25c4f..d6c04373 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -18,7 +18,7 @@ from subprocess import Popen from tempfile import mkdtemp from tornado import gen -from tornado.ioloop import PeriodicCallback +from tornado.ioloop import PeriodicCallback, IOLoop from traitlets.config import LoggingConfigurable from traitlets import ( @@ -635,12 +635,16 @@ class Spawner(LoggingConfigurable): @gen.coroutine def wait_for_death(self, timeout=10): """Wait for the single-user server to die, up to timeout seconds""" - for i in range(int(timeout / self.death_interval)): + loop = IOLoop.current() + tic = loop.time() + dt = self.death_interval + while dt > 0: status = yield self.poll() if status is not None: break else: - yield gen.sleep(self.death_interval) + yield gen.sleep(dt) + dt = min(dt * 2, timeout - (loop.time() - tic)) def _try_setcwd(path): diff --git a/jupyterhub/utils.py b/jupyterhub/utils.py index 1ac76a2e..ad14dc13 100644 --- a/jupyterhub/utils.py +++ b/jupyterhub/utils.py @@ -56,11 +56,13 @@ def wait_for_server(ip, port, timeout=10): ip = '127.0.0.1' loop = ioloop.IOLoop.current() tic = loop.time() - while loop.time() - tic < timeout: + dt = 0.1 + while dt > 0: if can_connect(ip, port): return else: - yield gen.sleep(0.1) + yield gen.sleep(dt) + dt = min(dt * 2, timeout - (loop.time() - tic)) raise TimeoutError( "Server at {ip}:{port} didn't respond in {timeout} seconds".format(**locals()) ) @@ -75,7 +77,8 @@ def wait_for_http_server(url, timeout=10): loop = ioloop.IOLoop.current() tic = loop.time() client = AsyncHTTPClient() - while loop.time() - tic < timeout: + dt = 0.1 + while dt > 0: try: r = yield client.fetch(url, follow_redirects=False) except HTTPError as e: @@ -86,16 +89,17 @@ def wait_for_http_server(url, timeout=10): # but 502 or other proxy error is conceivable app_log.warning( "Server at %s responded with error: %s", url, e.code) - yield gen.sleep(0.1) + yield gen.sleep(dt) else: app_log.debug("Server at %s responded with %s", url, e.code) return e.response except (OSError, socket.error) as e: if e.errno not in {errno.ECONNABORTED, errno.ECONNREFUSED, errno.ECONNRESET}: app_log.warning("Failed to connect to %s (%s)", url, e) - yield gen.sleep(0.1) + yield gen.sleep(dt) else: return r + dt = min(dt * 2, timeout - (loop.time() - tic)) raise TimeoutError( "Server at {url} didn't respond in {timeout} seconds".format(**locals()) From efa6a33b0a8f5835653e77cd5ffe3e0a753156ce Mon Sep 17 00:00:00 2001 From: Min RK Date: Sat, 15 Jul 2017 13:05:39 +0200 Subject: [PATCH 2/2] variables for exponential falloff --- jupyterhub/spawner.py | 6 +++--- jupyterhub/utils.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index d6c04373..f2f41efb 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -27,7 +27,7 @@ from traitlets import ( ) from .traitlets import Command, ByteSpecification -from .utils import random_port, url_path_join +from .utils import random_port, url_path_join, DT_MIN, DT_MAX, DT_SCALE class Spawner(LoggingConfigurable): @@ -630,7 +630,7 @@ class Spawner(LoggingConfigurable): self.log.exception("Unhandled error in poll callback for %s", self) return status - death_interval = Float(0.1) + death_interval = Float(DT_MIN) @gen.coroutine def wait_for_death(self, timeout=10): @@ -644,7 +644,7 @@ class Spawner(LoggingConfigurable): break else: yield gen.sleep(dt) - dt = min(dt * 2, timeout - (loop.time() - tic)) + dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic)) def _try_setcwd(path): diff --git a/jupyterhub/utils.py b/jupyterhub/utils.py index ad14dc13..07ab161f 100644 --- a/jupyterhub/utils.py +++ b/jupyterhub/utils.py @@ -48,6 +48,12 @@ def can_connect(ip, port): else: return True +# exponential falloff factors: +# start at 100ms, falloff by 2x +# never longer than 5s +DT_MIN = 0.1 +DT_SCALE = 2 +DT_MAX = 5 @gen.coroutine def wait_for_server(ip, port, timeout=10): @@ -56,13 +62,13 @@ def wait_for_server(ip, port, timeout=10): ip = '127.0.0.1' loop = ioloop.IOLoop.current() tic = loop.time() - dt = 0.1 + dt = DT_MIN while dt > 0: if can_connect(ip, port): return else: yield gen.sleep(dt) - dt = min(dt * 2, timeout - (loop.time() - tic)) + dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic)) raise TimeoutError( "Server at {ip}:{port} didn't respond in {timeout} seconds".format(**locals()) ) @@ -77,7 +83,7 @@ def wait_for_http_server(url, timeout=10): loop = ioloop.IOLoop.current() tic = loop.time() client = AsyncHTTPClient() - dt = 0.1 + dt = DT_MIN while dt > 0: try: r = yield client.fetch(url, follow_redirects=False) @@ -99,7 +105,7 @@ def wait_for_http_server(url, timeout=10): yield gen.sleep(dt) else: return r - dt = min(dt * 2, timeout - (loop.time() - tic)) + dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic)) raise TimeoutError( "Server at {url} didn't respond in {timeout} seconds".format(**locals())