mirror of
https://github.com/jupyterhub/jupyterhub.git
synced 2025-10-19 07:53:00 +00:00
Merge pull request #939 from minrk/service-logs
more debugging and health checks for services
This commit is contained in:
@@ -259,6 +259,9 @@ class JupyterHub(Application):
|
|||||||
proxy_check_interval = Integer(30,
|
proxy_check_interval = Integer(30,
|
||||||
help="Interval (in seconds) at which to check if the proxy is running."
|
help="Interval (in seconds) at which to check if the proxy is running."
|
||||||
).tag(config=True)
|
).tag(config=True)
|
||||||
|
service_check_interval = Integer(60,
|
||||||
|
help="Interval (in seconds) at which to check connectivity of services with web endpoints."
|
||||||
|
).tag(config=True)
|
||||||
|
|
||||||
data_files_path = Unicode(DATA_FILES_PATH,
|
data_files_path = Unicode(DATA_FILES_PATH,
|
||||||
help="The location of jupyterhub data files (e.g. /usr/local/share/jupyter/hub)"
|
help="The location of jupyterhub data files (e.g. /usr/local/share/jupyter/hub)"
|
||||||
@@ -1058,6 +1061,19 @@ class JupyterHub(Application):
|
|||||||
self.db.delete(service)
|
self.db.delete(service)
|
||||||
self.db.commit()
|
self.db.commit()
|
||||||
|
|
||||||
|
@gen.coroutine
|
||||||
|
def check_services_health(self):
|
||||||
|
"""Check connectivity of all services"""
|
||||||
|
for name, service in self._service_map.items():
|
||||||
|
if not service.url:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
yield service.orm.server.wait_up(timeout=1)
|
||||||
|
except TimeoutError:
|
||||||
|
self.log.warning("Cannot connect to %s service %s at %s", service.kind, name, service.url)
|
||||||
|
else:
|
||||||
|
self.log.debug("%s service %s running at %s", service.kind.title(), name, service.url)
|
||||||
|
|
||||||
@gen.coroutine
|
@gen.coroutine
|
||||||
def init_spawners(self):
|
def init_spawners(self):
|
||||||
db = self.db
|
db = self.db
|
||||||
@@ -1462,14 +1478,34 @@ class JupyterHub(Application):
|
|||||||
self.log.critical("Failed to start proxy", exc_info=True)
|
self.log.critical("Failed to start proxy", exc_info=True)
|
||||||
self.exit(1)
|
self.exit(1)
|
||||||
|
|
||||||
|
# start the service(s)
|
||||||
for service_name, service in self._service_map.items():
|
for service_name, service in self._service_map.items():
|
||||||
if not service.managed:
|
msg = '%s at %s' % (service_name, service.url) if service.url else service_name
|
||||||
continue
|
if service.managed:
|
||||||
try:
|
self.log.info("Starting managed service %s", msg)
|
||||||
service.start()
|
try:
|
||||||
except Exception as e:
|
service.start()
|
||||||
self.log.critical("Failed to start service %s", service_name, exc_info=True)
|
except Exception as e:
|
||||||
self.exit(1)
|
self.log.critical("Failed to start service %s", service_name, exc_info=True)
|
||||||
|
self.exit(1)
|
||||||
|
else:
|
||||||
|
self.log.info("Adding external service %s", msg)
|
||||||
|
|
||||||
|
if service.url:
|
||||||
|
tries = 10 if service.managed else 1
|
||||||
|
for i in range(tries):
|
||||||
|
try:
|
||||||
|
yield service.orm.server.wait_up(http=True, timeout=1)
|
||||||
|
except TimeoutError:
|
||||||
|
if service.managed:
|
||||||
|
status = yield service.spawner.poll()
|
||||||
|
if status is not None:
|
||||||
|
self.log.error("Service %s exited with status %s", service_name, status)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.log.error("Cannot connect to %s service %s at %s. Is it running?", service.kind, service_name, service.url)
|
||||||
|
|
||||||
loop.add_callback(self.proxy.add_all_users, self.users)
|
loop.add_callback(self.proxy.add_all_users, self.users)
|
||||||
loop.add_callback(self.proxy.add_all_services, self._service_map)
|
loop.add_callback(self.proxy.add_all_services, self._service_map)
|
||||||
@@ -1481,6 +1517,10 @@ class JupyterHub(Application):
|
|||||||
pc = PeriodicCallback(self.check_proxy, 1e3 * self.proxy_check_interval)
|
pc = PeriodicCallback(self.check_proxy, 1e3 * self.proxy_check_interval)
|
||||||
pc.start()
|
pc.start()
|
||||||
|
|
||||||
|
if self.service_check_interval and any(s.url for s in self._service_map.values()):
|
||||||
|
pc = PeriodicCallback(self.check_services_health, 1e3 * self.service_check_interval)
|
||||||
|
pc.start()
|
||||||
|
|
||||||
if self.last_activity_interval:
|
if self.last_activity_interval:
|
||||||
pc = PeriodicCallback(self.update_last_activity, 1e3 * self.last_activity_interval)
|
pc = PeriodicCallback(self.update_last_activity, 1e3 * self.last_activity_interval)
|
||||||
pc.start()
|
pc.start()
|
||||||
|
@@ -160,6 +160,15 @@ class Service(LoggingConfigurable):
|
|||||||
def managed(self):
|
def managed(self):
|
||||||
"""Am I managed by the Hub?"""
|
"""Am I managed by the Hub?"""
|
||||||
return bool(self.command)
|
return bool(self.command)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def kind(self):
|
||||||
|
"""The name of the kind of service as a string
|
||||||
|
|
||||||
|
- 'managed' for managed services
|
||||||
|
- 'external' for external services
|
||||||
|
"""
|
||||||
|
return 'managed' if self.managed else 'external'
|
||||||
|
|
||||||
command = Command(minlen=0,
|
command = Command(minlen=0,
|
||||||
help="Command to spawn this service, if managed."
|
help="Command to spawn this service, if managed."
|
||||||
|
Reference in New Issue
Block a user