Merge pull request #939 from minrk/service-logs

more debugging and health checks for services
This commit is contained in:
Min RK
2017-01-18 16:19:01 -10:00
committed by GitHub
2 changed files with 56 additions and 7 deletions

View File

@@ -259,6 +259,9 @@ class JupyterHub(Application):
proxy_check_interval = Integer(30,
help="Interval (in seconds) at which to check if the proxy is running."
).tag(config=True)
service_check_interval = Integer(60,
help="Interval (in seconds) at which to check connectivity of services with web endpoints."
).tag(config=True)
data_files_path = Unicode(DATA_FILES_PATH,
help="The location of jupyterhub data files (e.g. /usr/local/share/jupyter/hub)"
@@ -1058,6 +1061,19 @@ class JupyterHub(Application):
self.db.delete(service)
self.db.commit()
@gen.coroutine
def check_services_health(self):
"""Check connectivity of all services"""
for name, service in self._service_map.items():
if not service.url:
continue
try:
yield service.orm.server.wait_up(timeout=1)
except TimeoutError:
self.log.warning("Cannot connect to %s service %s at %s", service.kind, name, service.url)
else:
self.log.debug("%s service %s running at %s", service.kind.title(), name, service.url)
@gen.coroutine
def init_spawners(self):
db = self.db
@@ -1462,14 +1478,34 @@ class JupyterHub(Application):
self.log.critical("Failed to start proxy", exc_info=True)
self.exit(1)
# start the service(s)
for service_name, service in self._service_map.items():
if not service.managed:
continue
try:
service.start()
except Exception as e:
self.log.critical("Failed to start service %s", service_name, exc_info=True)
self.exit(1)
msg = '%s at %s' % (service_name, service.url) if service.url else service_name
if service.managed:
self.log.info("Starting managed service %s", msg)
try:
service.start()
except Exception as e:
self.log.critical("Failed to start service %s", service_name, exc_info=True)
self.exit(1)
else:
self.log.info("Adding external service %s", msg)
if service.url:
tries = 10 if service.managed else 1
for i in range(tries):
try:
yield service.orm.server.wait_up(http=True, timeout=1)
except TimeoutError:
if service.managed:
status = yield service.spawner.poll()
if status is not None:
self.log.error("Service %s exited with status %s", service_name, status)
break
else:
break
else:
self.log.error("Cannot connect to %s service %s at %s. Is it running?", service.kind, service_name, service.url)
loop.add_callback(self.proxy.add_all_users, self.users)
loop.add_callback(self.proxy.add_all_services, self._service_map)
@@ -1481,6 +1517,10 @@ class JupyterHub(Application):
pc = PeriodicCallback(self.check_proxy, 1e3 * self.proxy_check_interval)
pc.start()
if self.service_check_interval and any(s.url for s in self._service_map.values()):
pc = PeriodicCallback(self.check_services_health, 1e3 * self.service_check_interval)
pc.start()
if self.last_activity_interval:
pc = PeriodicCallback(self.update_last_activity, 1e3 * self.last_activity_interval)
pc.start()

View File

@@ -160,6 +160,15 @@ class Service(LoggingConfigurable):
def managed(self):
"""Am I managed by the Hub?"""
return bool(self.command)
@property
def kind(self):
"""The name of the kind of service as a string
- 'managed' for managed services
- 'external' for external services
"""
return 'managed' if self.managed else 'external'
command = Command(minlen=0,
help="Command to spawn this service, if managed."