mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-17 15:03:02 +00:00
Merge pull request #4561 from minrk/better-wait-up
Improve debugging when waiting for servers
This commit is contained in:
@@ -48,7 +48,7 @@ from jupyterhub.traitlets import Command
|
|||||||
from . import utils
|
from . import utils
|
||||||
from .metrics import CHECK_ROUTES_DURATION_SECONDS, PROXY_POLL_DURATION_SECONDS
|
from .metrics import CHECK_ROUTES_DURATION_SECONDS, PROXY_POLL_DURATION_SECONDS
|
||||||
from .objects import Server
|
from .objects import Server
|
||||||
from .utils import AnyTimeoutError, exponential_backoff, url_escape_path, url_path_join
|
from .utils import exponential_backoff, url_escape_path, url_path_join
|
||||||
|
|
||||||
|
|
||||||
def _one_at_a_time(method):
|
def _one_at_a_time(method):
|
||||||
@@ -766,24 +766,67 @@ class ConfigurableHTTPProxy(Proxy):
|
|||||||
|
|
||||||
self._write_pid_file()
|
self._write_pid_file()
|
||||||
|
|
||||||
def _check_process():
|
async def wait_for_process():
|
||||||
status = self.proxy_process.poll()
|
"""Watch proxy process for early termination
|
||||||
if status is not None:
|
|
||||||
with self.proxy_process:
|
Runs forever, checking every 0.5s if the process has exited
|
||||||
e = RuntimeError("Proxy failed to start with exit code %i" % status)
|
so we don't keep waiting for endpoints after the proxy has stopped.
|
||||||
raise e from None
|
|
||||||
|
Raises RuntimeError if/when the proxy process exits,
|
||||||
|
otherwise runs forever.
|
||||||
|
Should be cancelled when servers become ready.
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
status = self.proxy_process.poll()
|
||||||
|
if status is not None:
|
||||||
|
with self.proxy_process:
|
||||||
|
e = RuntimeError(
|
||||||
|
f"Proxy failed to start with exit code {status}"
|
||||||
|
)
|
||||||
|
raise e from None
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
# process_exited can only resolve with a RuntimeError when the process has exited,
|
||||||
|
# otherwise it must be cancelled.
|
||||||
|
process_exited = asyncio.ensure_future(wait_for_process())
|
||||||
|
|
||||||
|
# wait for both servers to be ready (or one server to fail)
|
||||||
|
server_futures = [
|
||||||
|
asyncio.ensure_future(server.wait_up(10))
|
||||||
|
for server in (public_server, api_server)
|
||||||
|
]
|
||||||
|
servers_ready = asyncio.gather(*server_futures)
|
||||||
|
|
||||||
|
# wait for process to crash or servers to be ready,
|
||||||
|
# whichever comes first
|
||||||
|
wait_timeout = 15
|
||||||
|
ready, pending = await asyncio.wait(
|
||||||
|
[
|
||||||
|
process_exited,
|
||||||
|
servers_ready,
|
||||||
|
],
|
||||||
|
return_when=asyncio.FIRST_COMPLETED,
|
||||||
|
timeout=wait_timeout,
|
||||||
|
)
|
||||||
|
for task in [servers_ready, process_exited] + server_futures:
|
||||||
|
# cancel any pending tasks
|
||||||
|
if not task.done():
|
||||||
|
task.cancel()
|
||||||
|
if not ready:
|
||||||
|
# timeouts passed to wait_up should prevent this,
|
||||||
|
# but weird things like DNS delays may result in
|
||||||
|
# wait_up taking a lot longer than it should
|
||||||
|
raise TimeoutError(
|
||||||
|
f"Waiting for proxy endpoints didn't complete in {wait_timeout}s"
|
||||||
|
)
|
||||||
|
if process_exited in ready:
|
||||||
|
# process exited, this will raise RuntimeError
|
||||||
|
await process_exited
|
||||||
|
else:
|
||||||
|
# if we got here, servers_ready is done
|
||||||
|
# await it to make sure exceptions are raised
|
||||||
|
await servers_ready
|
||||||
|
|
||||||
for server in (public_server, api_server):
|
|
||||||
for i in range(10):
|
|
||||||
_check_process()
|
|
||||||
try:
|
|
||||||
await server.wait_up(1)
|
|
||||||
except AnyTimeoutError:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
await server.wait_up(1)
|
|
||||||
_check_process()
|
|
||||||
self.log.debug("Proxy started and appears to be up")
|
self.log.debug("Proxy started and appears to be up")
|
||||||
pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval)
|
pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval)
|
||||||
self._check_running_callback = pc
|
self._check_running_callback = pc
|
||||||
|
@@ -15,6 +15,7 @@ import ssl
|
|||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
import warnings
|
import warnings
|
||||||
from binascii import b2a_hex
|
from binascii import b2a_hex
|
||||||
@@ -93,6 +94,8 @@ def can_connect(ip, port):
|
|||||||
except OSError as e:
|
except OSError as e:
|
||||||
if e.errno not in {errno.ECONNREFUSED, errno.ETIMEDOUT}:
|
if e.errno not in {errno.ECONNREFUSED, errno.ETIMEDOUT}:
|
||||||
app_log.error("Unexpected error connecting to %s:%i %s", ip, port, e)
|
app_log.error("Unexpected error connecting to %s:%i %s", ip, port, e)
|
||||||
|
else:
|
||||||
|
app_log.debug("Server at %s:%i not ready: %s", ip, port, e)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
@@ -245,6 +248,8 @@ async def wait_for_server(ip, port, timeout=10):
|
|||||||
"""Wait for any server to show up at ip:port."""
|
"""Wait for any server to show up at ip:port."""
|
||||||
if ip in {'', '0.0.0.0', '::'}:
|
if ip in {'', '0.0.0.0', '::'}:
|
||||||
ip = '127.0.0.1'
|
ip = '127.0.0.1'
|
||||||
|
app_log.debug("Waiting %ss for server at %s:%s", timeout, ip, port)
|
||||||
|
tic = time.perf_counter()
|
||||||
await exponential_backoff(
|
await exponential_backoff(
|
||||||
lambda: can_connect(ip, port),
|
lambda: can_connect(ip, port),
|
||||||
"Server at {ip}:{port} didn't respond in {timeout} seconds".format(
|
"Server at {ip}:{port} didn't respond in {timeout} seconds".format(
|
||||||
@@ -252,6 +257,8 @@ async def wait_for_server(ip, port, timeout=10):
|
|||||||
),
|
),
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
toc = time.perf_counter()
|
||||||
|
app_log.debug("Server at %s:%s responded in %.2fs", ip, port, toc - tic)
|
||||||
|
|
||||||
|
|
||||||
async def wait_for_http_server(url, timeout=10, ssl_context=None):
|
async def wait_for_http_server(url, timeout=10, ssl_context=None):
|
||||||
@@ -259,12 +266,13 @@ async def wait_for_http_server(url, timeout=10, ssl_context=None):
|
|||||||
|
|
||||||
Any non-5XX response code will do, even 404.
|
Any non-5XX response code will do, even 404.
|
||||||
"""
|
"""
|
||||||
loop = ioloop.IOLoop.current()
|
|
||||||
tic = loop.time()
|
|
||||||
client = AsyncHTTPClient()
|
client = AsyncHTTPClient()
|
||||||
if ssl_context:
|
if ssl_context:
|
||||||
client.ssl_options = ssl_context
|
client.ssl_options = ssl_context
|
||||||
|
|
||||||
|
app_log.debug("Waiting %ss for server at %s", timeout, url)
|
||||||
|
tic = time.perf_counter()
|
||||||
|
|
||||||
async def is_reachable():
|
async def is_reachable():
|
||||||
try:
|
try:
|
||||||
r = await client.fetch(url, follow_redirects=False)
|
r = await client.fetch(url, follow_redirects=False)
|
||||||
@@ -297,6 +305,8 @@ async def wait_for_http_server(url, timeout=10, ssl_context=None):
|
|||||||
),
|
),
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
toc = time.perf_counter()
|
||||||
|
app_log.debug("Server at %s responded in %.2fs", url, toc - tic)
|
||||||
return re
|
return re
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user