Improve debugging when waiting for servers

Add debug messages and timers for start and end waiting for servers

and improve logic for awaiting proxy endpoints using concurrency primitives instead of a for-loop
This commit is contained in:
Min RK
2023-09-12 09:24:01 +02:00
parent 66f9034dab
commit ece8408381
2 changed files with 42 additions and 20 deletions

View File

@@ -48,7 +48,7 @@ from jupyterhub.traitlets import Command
from . import utils from . import utils
from .metrics import CHECK_ROUTES_DURATION_SECONDS, PROXY_POLL_DURATION_SECONDS from .metrics import CHECK_ROUTES_DURATION_SECONDS, PROXY_POLL_DURATION_SECONDS
from .objects import Server from .objects import Server
from .utils import AnyTimeoutError, exponential_backoff, url_escape_path, url_path_join from .utils import exponential_backoff, url_escape_path, url_path_join
def _one_at_a_time(method): def _one_at_a_time(method):
@@ -766,24 +766,36 @@ class ConfigurableHTTPProxy(Proxy):
self._write_pid_file() self._write_pid_file()
def _check_process(): async def watch_process():
status = self.proxy_process.poll() """Watch proxy process for early termination
if status is not None:
with self.proxy_process:
e = RuntimeError("Proxy failed to start with exit code %i" % status)
raise e from None
for server in (public_server, api_server): so we don't keep waiting for connections after the proxy exits
for i in range(10): """
_check_process() while True:
try: status = self.proxy_process.poll()
await server.wait_up(1) if status is not None:
except AnyTimeoutError: with self.proxy_process:
continue e = RuntimeError(
else: f"Proxy failed to start with exit code {status}"
break )
await server.wait_up(1) raise e from None
_check_process()
await asyncio.wait([servers_ready], timeout=0.5)
# stop watching the process when the connections
# have finished (success or failure)
if servers_ready.done():
return
servers_ready = asyncio.gather(
*(server.wait_up(10) for server in (public_server, api_server))
)
process_ended = asyncio.ensure_future(watch_process())
# wait for process to crash or servers to be ready,
# whichever comes first
# don't need to handle exceptions here,
# as the underlying exceptions have informative error messages
await asyncio.wait_for(asyncio.gather(process_ended, servers_ready), timeout=15)
self.log.debug("Proxy started and appears to be up") self.log.debug("Proxy started and appears to be up")
pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval) pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval)
self._check_running_callback = pc self._check_running_callback = pc

View File

@@ -15,6 +15,7 @@ import ssl
import string import string
import sys import sys
import threading import threading
import time
import uuid import uuid
import warnings import warnings
from binascii import b2a_hex from binascii import b2a_hex
@@ -93,6 +94,8 @@ def can_connect(ip, port):
except OSError as e: except OSError as e:
if e.errno not in {errno.ECONNREFUSED, errno.ETIMEDOUT}: if e.errno not in {errno.ECONNREFUSED, errno.ETIMEDOUT}:
app_log.error("Unexpected error connecting to %s:%i %s", ip, port, e) app_log.error("Unexpected error connecting to %s:%i %s", ip, port, e)
else:
app_log.debug("Server at %s:%i not ready: %s", ip, port, e)
return False return False
else: else:
return True return True
@@ -245,6 +248,8 @@ async def wait_for_server(ip, port, timeout=10):
"""Wait for any server to show up at ip:port.""" """Wait for any server to show up at ip:port."""
if ip in {'', '0.0.0.0', '::'}: if ip in {'', '0.0.0.0', '::'}:
ip = '127.0.0.1' ip = '127.0.0.1'
app_log.debug("Waiting %ss for server at %s:%s", timeout, ip, port)
tic = time.perf_counter()
await exponential_backoff( await exponential_backoff(
lambda: can_connect(ip, port), lambda: can_connect(ip, port),
"Server at {ip}:{port} didn't respond in {timeout} seconds".format( "Server at {ip}:{port} didn't respond in {timeout} seconds".format(
@@ -252,6 +257,8 @@ async def wait_for_server(ip, port, timeout=10):
), ),
timeout=timeout, timeout=timeout,
) )
toc = time.perf_counter()
app_log.debug("Server at %s:%s responded in %.2fs", ip, port, toc - tic)
async def wait_for_http_server(url, timeout=10, ssl_context=None): async def wait_for_http_server(url, timeout=10, ssl_context=None):
@@ -259,12 +266,13 @@ async def wait_for_http_server(url, timeout=10, ssl_context=None):
Any non-5XX response code will do, even 404. Any non-5XX response code will do, even 404.
""" """
loop = ioloop.IOLoop.current()
tic = loop.time()
client = AsyncHTTPClient() client = AsyncHTTPClient()
if ssl_context: if ssl_context:
client.ssl_options = ssl_context client.ssl_options = ssl_context
app_log.debug("Waiting %ss for server at %s", timeout, url)
tic = time.perf_counter()
async def is_reachable(): async def is_reachable():
try: try:
r = await client.fetch(url, follow_redirects=False) r = await client.fetch(url, follow_redirects=False)
@@ -297,6 +305,8 @@ async def wait_for_http_server(url, timeout=10, ssl_context=None):
), ),
timeout=timeout, timeout=timeout,
) )
toc = time.perf_counter()
app_log.debug("Server at %s responded in %.2fs", url, toc - tic)
return re return re