diff --git a/jupyterhub/proxy.py b/jupyterhub/proxy.py index 87b47a6c..85fb23d7 100644 --- a/jupyterhub/proxy.py +++ b/jupyterhub/proxy.py @@ -48,7 +48,7 @@ from jupyterhub.traitlets import Command from . import utils from .metrics import CHECK_ROUTES_DURATION_SECONDS, PROXY_POLL_DURATION_SECONDS from .objects import Server -from .utils import AnyTimeoutError, exponential_backoff, url_escape_path, url_path_join +from .utils import exponential_backoff, url_escape_path, url_path_join def _one_at_a_time(method): @@ -766,24 +766,67 @@ class ConfigurableHTTPProxy(Proxy): self._write_pid_file() - def _check_process(): - status = self.proxy_process.poll() - if status is not None: - with self.proxy_process: - e = RuntimeError("Proxy failed to start with exit code %i" % status) - raise e from None + async def wait_for_process(): + """Watch proxy process for early termination + + Runs forever, checking every 0.5s if the process has exited + so we don't keep waiting for endpoints after the proxy has stopped. + + Raises RuntimeError if/when the proxy process exits, + otherwise runs forever. + Should be cancelled when servers become ready. + """ + while True: + status = self.proxy_process.poll() + if status is not None: + with self.proxy_process: + e = RuntimeError( + f"Proxy failed to start with exit code {status}" + ) + raise e from None + await asyncio.sleep(0.5) + + # process_exited can only resolve with a RuntimeError when the process has exited, + # otherwise it must be cancelled. + process_exited = asyncio.ensure_future(wait_for_process()) + + # wait for both servers to be ready (or one server to fail) + server_futures = [ + asyncio.ensure_future(server.wait_up(10)) + for server in (public_server, api_server) + ] + servers_ready = asyncio.gather(*server_futures) + + # wait for process to crash or servers to be ready, + # whichever comes first + wait_timeout = 15 + ready, pending = await asyncio.wait( + [ + process_exited, + servers_ready, + ], + return_when=asyncio.FIRST_COMPLETED, + timeout=wait_timeout, + ) + for task in [servers_ready, process_exited] + server_futures: + # cancel any pending tasks + if not task.done(): + task.cancel() + if not ready: + # timeouts passed to wait_up should prevent this, + # but weird things like DNS delays may result in + # wait_up taking a lot longer than it should + raise TimeoutError( + f"Waiting for proxy endpoints didn't complete in {wait_timeout}s" + ) + if process_exited in ready: + # process exited, this will raise RuntimeError + await process_exited + else: + # if we got here, servers_ready is done + # await it to make sure exceptions are raised + await servers_ready - for server in (public_server, api_server): - for i in range(10): - _check_process() - try: - await server.wait_up(1) - except AnyTimeoutError: - continue - else: - break - await server.wait_up(1) - _check_process() self.log.debug("Proxy started and appears to be up") pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval) self._check_running_callback = pc diff --git a/jupyterhub/utils.py b/jupyterhub/utils.py index 4db7ffe0..e703d9fd 100644 --- a/jupyterhub/utils.py +++ b/jupyterhub/utils.py @@ -15,6 +15,7 @@ import ssl import string import sys import threading +import time import uuid import warnings from binascii import b2a_hex @@ -93,6 +94,8 @@ def can_connect(ip, port): except OSError as e: if e.errno not in {errno.ECONNREFUSED, errno.ETIMEDOUT}: app_log.error("Unexpected error connecting to %s:%i %s", ip, port, e) + else: + app_log.debug("Server at %s:%i not ready: %s", ip, port, e) return False else: return True @@ -245,6 +248,8 @@ async def wait_for_server(ip, port, timeout=10): """Wait for any server to show up at ip:port.""" if ip in {'', '0.0.0.0', '::'}: ip = '127.0.0.1' + app_log.debug("Waiting %ss for server at %s:%s", timeout, ip, port) + tic = time.perf_counter() await exponential_backoff( lambda: can_connect(ip, port), "Server at {ip}:{port} didn't respond in {timeout} seconds".format( @@ -252,6 +257,8 @@ async def wait_for_server(ip, port, timeout=10): ), timeout=timeout, ) + toc = time.perf_counter() + app_log.debug("Server at %s:%s responded in %.2fs", ip, port, toc - tic) async def wait_for_http_server(url, timeout=10, ssl_context=None): @@ -259,12 +266,13 @@ async def wait_for_http_server(url, timeout=10, ssl_context=None): Any non-5XX response code will do, even 404. """ - loop = ioloop.IOLoop.current() - tic = loop.time() client = AsyncHTTPClient() if ssl_context: client.ssl_options = ssl_context + app_log.debug("Waiting %ss for server at %s", timeout, url) + tic = time.perf_counter() + async def is_reachable(): try: r = await client.fetch(url, follow_redirects=False) @@ -297,6 +305,8 @@ async def wait_for_http_server(url, timeout=10, ssl_context=None): ), timeout=timeout, ) + toc = time.perf_counter() + app_log.debug("Server at %s responded in %.2fs", url, toc - tic) return re