Merge pull request #1223 from minrk/wait-up-fall-off

apply exponential backoff to all waits
This commit is contained in:
Carol Willing
2017-07-17 09:15:48 -07:00
committed by GitHub
2 changed files with 24 additions and 10 deletions

View File

@@ -16,7 +16,7 @@ from subprocess import Popen
from tempfile import mkdtemp from tempfile import mkdtemp
from tornado import gen from tornado import gen
from tornado.ioloop import PeriodicCallback from tornado.ioloop import PeriodicCallback, IOLoop
from traitlets.config import LoggingConfigurable from traitlets.config import LoggingConfigurable
from traitlets import ( from traitlets import (
@@ -25,7 +25,7 @@ from traitlets import (
) )
from .traitlets import Command, ByteSpecification from .traitlets import Command, ByteSpecification
from .utils import random_port, url_path_join from .utils import random_port, url_path_join, DT_MIN, DT_MAX, DT_SCALE
class Spawner(LoggingConfigurable): class Spawner(LoggingConfigurable):
@@ -628,17 +628,21 @@ class Spawner(LoggingConfigurable):
self.log.exception("Unhandled error in poll callback for %s", self) self.log.exception("Unhandled error in poll callback for %s", self)
return status return status
death_interval = Float(0.1) death_interval = Float(DT_MIN)
@gen.coroutine @gen.coroutine
def wait_for_death(self, timeout=10): def wait_for_death(self, timeout=10):
"""Wait for the single-user server to die, up to timeout seconds""" """Wait for the single-user server to die, up to timeout seconds"""
for i in range(int(timeout / self.death_interval)): loop = IOLoop.current()
tic = loop.time()
dt = self.death_interval
while dt > 0:
status = yield self.poll() status = yield self.poll()
if status is not None: if status is not None:
break break
else: else:
yield gen.sleep(self.death_interval) yield gen.sleep(dt)
dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic))
def _try_setcwd(path): def _try_setcwd(path):

View File

@@ -48,6 +48,12 @@ def can_connect(ip, port):
else: else:
return True return True
# exponential falloff factors:
# start at 100ms, falloff by 2x
# never longer than 5s
DT_MIN = 0.1
DT_SCALE = 2
DT_MAX = 5
@gen.coroutine @gen.coroutine
def wait_for_server(ip, port, timeout=10): def wait_for_server(ip, port, timeout=10):
@@ -56,11 +62,13 @@ def wait_for_server(ip, port, timeout=10):
ip = '127.0.0.1' ip = '127.0.0.1'
loop = ioloop.IOLoop.current() loop = ioloop.IOLoop.current()
tic = loop.time() tic = loop.time()
while loop.time() - tic < timeout: dt = DT_MIN
while dt > 0:
if can_connect(ip, port): if can_connect(ip, port):
return return
else: else:
yield gen.sleep(0.1) yield gen.sleep(dt)
dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic))
raise TimeoutError( raise TimeoutError(
"Server at {ip}:{port} didn't respond in {timeout} seconds".format(**locals()) "Server at {ip}:{port} didn't respond in {timeout} seconds".format(**locals())
) )
@@ -75,7 +83,8 @@ def wait_for_http_server(url, timeout=10):
loop = ioloop.IOLoop.current() loop = ioloop.IOLoop.current()
tic = loop.time() tic = loop.time()
client = AsyncHTTPClient() client = AsyncHTTPClient()
while loop.time() - tic < timeout: dt = DT_MIN
while dt > 0:
try: try:
r = yield client.fetch(url, follow_redirects=False) r = yield client.fetch(url, follow_redirects=False)
except HTTPError as e: except HTTPError as e:
@@ -86,16 +95,17 @@ def wait_for_http_server(url, timeout=10):
# but 502 or other proxy error is conceivable # but 502 or other proxy error is conceivable
app_log.warning( app_log.warning(
"Server at %s responded with error: %s", url, e.code) "Server at %s responded with error: %s", url, e.code)
yield gen.sleep(0.1) yield gen.sleep(dt)
else: else:
app_log.debug("Server at %s responded with %s", url, e.code) app_log.debug("Server at %s responded with %s", url, e.code)
return e.response return e.response
except (OSError, socket.error) as e: except (OSError, socket.error) as e:
if e.errno not in {errno.ECONNABORTED, errno.ECONNREFUSED, errno.ECONNRESET}: if e.errno not in {errno.ECONNABORTED, errno.ECONNREFUSED, errno.ECONNRESET}:
app_log.warning("Failed to connect to %s (%s)", url, e) app_log.warning("Failed to connect to %s (%s)", url, e)
yield gen.sleep(0.1) yield gen.sleep(dt)
else: else:
return r return r
dt = min(dt * DT_SCALE, DT_MAX, timeout - (loop.time() - tic))
raise TimeoutError( raise TimeoutError(
"Server at {url} didn't respond in {timeout} seconds".format(**locals()) "Server at {url} didn't respond in {timeout} seconds".format(**locals())