diff --git a/dev-requirements.txt b/dev-requirements.txt index 9638d968..0543252e 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,7 +2,9 @@ mock beautifulsoup4 codecov +coverage<5 # pin coverage to < 5 due to coveragepy#716 cryptography +html5lib # needed for beautifulsoup pytest-cov pytest-tornado pytest>=3.3 diff --git a/docs/source/getting-started/security-basics.rst b/docs/source/getting-started/security-basics.rst index 58a00e26..80996555 100644 --- a/docs/source/getting-started/security-basics.rst +++ b/docs/source/getting-started/security-basics.rst @@ -124,7 +124,7 @@ hex-encoded string. You can set it this way: .. code-block:: bash - export JPY_COOKIE_SECRET=`openssl rand -hex 32` + export JPY_COOKIE_SECRET=$(openssl rand -hex 32) For security reasons, this environment variable should only be visible to the Hub. If you set it dynamically as above, all users will be logged out each time @@ -173,7 +173,7 @@ using the ``CONFIGPROXY_AUTH_TOKEN`` environment variable: .. code-block:: bash - export CONFIGPROXY_AUTH_TOKEN='openssl rand -hex 32' + export CONFIGPROXY_AUTH_TOKEN=$(openssl rand -hex 32) This environment variable needs to be visible to the Hub and Proxy. diff --git a/docs/source/getting-started/services-basics.md b/docs/source/getting-started/services-basics.md index e9e399e2..8273f7d6 100644 --- a/docs/source/getting-started/services-basics.md +++ b/docs/source/getting-started/services-basics.md @@ -88,7 +88,7 @@ c.JupyterHub.services = [ { 'name': 'cull-idle', 'admin': True, - 'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), + 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'], } ] ``` diff --git a/docs/source/reference/services.md b/docs/source/reference/services.md index c554ed4d..24de3bdd 100644 --- a/docs/source/reference/services.md +++ b/docs/source/reference/services.md @@ -93,7 +93,7 @@ c.JupyterHub.services = [ { 'name': 'cull-idle', 'admin': True, - 'command': ['python', '/path/to/cull-idle.py', '--timeout'] + 'command': [sys.executable, '/path/to/cull-idle.py', '--timeout'] } ] ``` diff --git a/examples/bootstrap-script/README.md b/examples/bootstrap-script/README.md index f08a3b8d..d2f27053 100644 --- a/examples/bootstrap-script/README.md +++ b/examples/bootstrap-script/README.md @@ -118,7 +118,7 @@ Here's an example on what you could do in your shell script. See also # - The first parameter for the Bootstrap Script is the USER. USER=$1 -if ["$USER" == ""]; then +if [ "$USER" == "" ]; then exit 1 fi # ---------------------------------------------------------------------------- diff --git a/examples/bootstrap-script/bootstrap.sh b/examples/bootstrap-script/bootstrap.sh index 405c8694..417d8463 100755 --- a/examples/bootstrap-script/bootstrap.sh +++ b/examples/bootstrap-script/bootstrap.sh @@ -6,7 +6,7 @@ # - The first parameter for the Bootstrap Script is the USER. USER=$1 -if ["$USER" == ""]; then +if [ "$USER" == "" ]; then exit 1 fi # ---------------------------------------------------------------------------- diff --git a/examples/cull-idle/README.md b/examples/cull-idle/README.md index c02320f2..9f043e05 100644 --- a/examples/cull-idle/README.md +++ b/examples/cull-idle/README.md @@ -15,7 +15,7 @@ c.JupyterHub.services = [ { 'name': 'cull-idle', 'admin': True, - 'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), + 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'], } ] ``` @@ -36,6 +36,6 @@ Generate an API token and store it in the `JUPYTERHUB_API_TOKEN` environment variable. Run `cull_idle_servers.py` manually. ```bash - export JUPYTERHUB_API_TOKEN=`jupyterhub token` + export JUPYTERHUB_API_TOKEN=$(jupyterhub token) python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] ``` diff --git a/examples/cull-idle/cull_idle_servers.py b/examples/cull-idle/cull_idle_servers.py index 5d9a1c92..451b8abf 100755 --- a/examples/cull-idle/cull_idle_servers.py +++ b/examples/cull-idle/cull_idle_servers.py @@ -16,13 +16,13 @@ You can run this as a service managed by JupyterHub with this in your config:: { 'name': 'cull-idle', 'admin': True, - 'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), + 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'], } ] Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`: - export JUPYTERHUB_API_TOKEN=`jupyterhub token` + export JUPYTERHUB_API_TOKEN=$(jupyterhub token) python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] This script uses the same ``--timeout`` and ``--max-age`` values for diff --git a/examples/cull-idle/jupyterhub_config.py b/examples/cull-idle/jupyterhub_config.py index f8a6bf52..4668fd8a 100644 --- a/examples/cull-idle/jupyterhub_config.py +++ b/examples/cull-idle/jupyterhub_config.py @@ -3,6 +3,6 @@ c.JupyterHub.services = [ { 'name': 'cull-idle', 'admin': True, - 'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), + 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'], } ] diff --git a/examples/external-oauth/README.md b/examples/external-oauth/README.md index 96324b3d..a944cb68 100644 --- a/examples/external-oauth/README.md +++ b/examples/external-oauth/README.md @@ -18,7 +18,7 @@ implementations in other web servers or languages. 1. generate an API token: - export JUPYTERHUB_API_TOKEN=`openssl rand -hex 32` + export JUPYTERHUB_API_TOKEN=$(openssl rand -hex 32) 2. launch a version of the the whoami service. For `whoami-oauth`: diff --git a/examples/service-announcement/README.md b/examples/service-announcement/README.md index f61f3505..4cdf7bbb 100644 --- a/examples/service-announcement/README.md +++ b/examples/service-announcement/README.md @@ -11,7 +11,7 @@ configuration file something like: { 'name': 'announcement', 'url': 'http://127.0.0.1:8888', - 'command': ["python", "-m", "announcement"], + 'command': [sys.executable, "-m", "announcement"], } ] diff --git a/examples/service-announcement/jupyterhub_config.py b/examples/service-announcement/jupyterhub_config.py index a6136d6c..31e0289c 100644 --- a/examples/service-announcement/jupyterhub_config.py +++ b/examples/service-announcement/jupyterhub_config.py @@ -5,7 +5,7 @@ c.JupyterHub.services = [ { 'name': 'announcement', 'url': 'http://127.0.0.1:8888', - 'command': ["python", "-m", "announcement"], + 'command': [sys.executable, "-m", "announcement"], } ] diff --git a/examples/service-whoami-flask/launch.sh b/examples/service-whoami-flask/launch.sh index 79342e44..4242992e 100644 --- a/examples/service-whoami-flask/launch.sh +++ b/examples/service-whoami-flask/launch.sh @@ -1,4 +1,4 @@ -export CONFIGPROXY_AUTH_TOKEN=`openssl rand -hex 32` +export CONFIGPROXY_AUTH_TOKEN=$(openssl rand -hex 32) # start JupyterHub jupyterhub --ip=127.0.0.1 diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 89924d12..cf44518b 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -32,7 +32,8 @@ from ..utils import maybe_future, url_path_join from ..metrics import ( SERVER_SPAWN_DURATION_SECONDS, ServerSpawnStatus, PROXY_ADD_DURATION_SECONDS, ProxyAddStatus, - RUNNING_SERVERS + SERVER_POLL_DURATION_SECONDS, ServerPollStatus, + RUNNING_SERVERS, SERVER_STOP_DURATION_SECONDS, ServerStopStatus ) # pattern for the authentication token header @@ -821,13 +822,19 @@ class BaseHandler(RequestHandler): # start has finished, but the server hasn't come up # check if the server died while we were waiting + poll_start_time = time.perf_counter() status = await spawner.poll() + SERVER_POLL_DURATION_SECONDS.labels( + status=ServerPollStatus.from_status(status) + ).observe(time.perf_counter() - poll_start_time) + if status is not None: toc = IOLoop.current().time() self.statsd.timing('spawner.failure', (toc - tic) * 1000) SERVER_SPAWN_DURATION_SECONDS.labels( status=ServerSpawnStatus.failure ).observe(time.perf_counter() - spawn_start_time) + raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % ( status, spawner._log_name)) @@ -848,9 +855,17 @@ class BaseHandler(RequestHandler): async def user_stopped(self, user, server_name): """Callback that fires when the spawner has stopped""" spawner = user.spawners[server_name] + + poll_start_time = time.perf_counter() status = await spawner.poll() + SERVER_POLL_DURATION_SECONDS.labels( + status=ServerPollStatus.from_status(status) + ).observe(time.perf_counter() - poll_start_time) + + if status is None: status = 'unknown' + self.log.warning("User %s server stopped, with exit code: %s", user.name, status, ) @@ -874,18 +889,25 @@ class BaseHandler(RequestHandler): 2. stop the server 3. notice that it stopped """ - tic = IOLoop.current().time() + tic = time.perf_counter() try: await self.proxy.delete_user(user, server_name) await user.stop(server_name) + toc = time.perf_counter() + self.log.info("User %s server took %.3f seconds to stop", user.name, toc - tic) + self.statsd.timing('spawner.stop', (toc - tic) * 1000) + RUNNING_SERVERS.dec() + SERVER_STOP_DURATION_SECONDS.labels( + status=ServerStopStatus.success + ).observe(toc - tic) + except: + SERVER_STOP_DURATION_SECONDS.labels( + status=ServerStopStatus.failure + ).observe(time.perf_counter() - tic) finally: spawner._stop_future = None spawner._stop_pending = False - toc = IOLoop.current().time() - self.log.info("User %s server took %.3f seconds to stop", user.name, toc - tic) - self.statsd.timing('spawner.stop', (toc - tic) * 1000) - RUNNING_SERVERS.dec() future = spawner._stop_future = asyncio.ensure_future(stop()) @@ -1152,10 +1174,13 @@ class UserSpawnHandler(BaseHandler): # spawn has supposedly finished, check on the status if spawner.ready: + poll_start_time = time.perf_counter() status = await spawner.poll() + SERVER_POLL_DURATION_SECONDS.labels( + status=ServerPollStatus.from_status(status) + ).observe(time.perf_counter() - poll_start_time) else: status = 0 - # server is not running, trigger spawn if status is not None: if spawner.options_form: diff --git a/jupyterhub/handlers/pages.py b/jupyterhub/handlers/pages.py index 5385b79e..bb1f1437 100644 --- a/jupyterhub/handlers/pages.py +++ b/jupyterhub/handlers/pages.py @@ -12,7 +12,7 @@ from tornado import web, gen from tornado.httputil import url_concat from .. import orm -from ..utils import admin_only, url_path_join +from ..utils import admin_only, url_path_join, maybe_future from .base import BaseHandler @@ -147,7 +147,7 @@ class SpawnHandler(BaseHandler): for key, byte_list in self.request.files.items(): form_options["%s_file"%key] = byte_list try: - options = user.spawner.options_from_form(form_options) + options = await maybe_future(user.spawner.options_from_form(form_options)) await self.spawn_single_user(user, options=options) except Exception as e: self.log.error("Failed to spawn single-user server with form", exc_info=True) diff --git a/jupyterhub/metrics.py b/jupyterhub/metrics.py index 5408dd35..fd321f5b 100644 --- a/jupyterhub/metrics.py +++ b/jupyterhub/metrics.py @@ -37,11 +37,23 @@ SERVER_SPAWN_DURATION_SECONDS = Histogram( RUNNING_SERVERS = Gauge( 'running_servers', - 'the number of user servers currently running', + 'the number of user servers currently running' ) RUNNING_SERVERS.set(0) +TOTAL_USERS = Gauge( + 'total_users', + 'toal number of users' + ) + +TOTAL_USERS.set(0) + +CHECK_ROUTES_DURATION_SECONDS = Histogram( + 'check_routes_duration_seconds', + 'Time taken to validate all routes in proxy' +) + class ServerSpawnStatus(Enum): """ Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS @@ -79,6 +91,52 @@ class ProxyAddStatus(Enum): for s in ProxyAddStatus: PROXY_ADD_DURATION_SECONDS.labels(status=s) + +SERVER_POLL_DURATION_SECONDS = Histogram( + 'server_poll_duration_seconds', + 'time taken to poll if server is running', + ['status'] +) + +class ServerPollStatus(Enum): + """ + Possible values for 'status' label of SERVER_POLL_DURATION_SECONDS + """ + running = 'running' + stopped = 'stopped' + + @classmethod + def from_status(cls, status): + """Return enum string for a given poll status""" + if status is None: + return cls.running + return cls.stopped + +for s in ServerPollStatus: + SERVER_POLL_DURATION_SECONDS.labels(status=s) + + + +SERVER_STOP_DURATION_SECONDS = Histogram( + 'server_stop_seconds', + 'time taken for server stopping operation', + ['status'], +) + +class ServerStopStatus(Enum): + """ + Possible values for 'status' label of SERVER_STOP_DURATION_SECONDS + """ + success = 'success' + failure = 'failure' + + def __str__(self): + return self.value + +for s in ServerStopStatus: + SERVER_STOP_DURATION_SECONDS.labels(status=s) + + def prometheus_log_method(handler): """ Tornado log handler for recording RED metrics. diff --git a/jupyterhub/proxy.py b/jupyterhub/proxy.py index e2dc7ef2..6e8494cb 100644 --- a/jupyterhub/proxy.py +++ b/jupyterhub/proxy.py @@ -39,6 +39,8 @@ from traitlets import ( from jupyterhub.traitlets import Command from traitlets.config import LoggingConfigurable + +from .metrics import CHECK_ROUTES_DURATION_SECONDS from .objects import Server from . import utils from .utils import url_path_join, make_ssl_context @@ -292,6 +294,7 @@ class Proxy(LoggingConfigurable): @_one_at_a_time async def check_routes(self, user_dict, service_dict, routes=None): """Check that all users are properly routed on the proxy.""" + start = time.perf_counter() #timer starts here when user is created if not routes: self.log.debug("Fetching routes to check") routes = await self.get_all_routes() @@ -364,6 +367,8 @@ class Proxy(LoggingConfigurable): futures.append(self.delete_route(routespec)) await gen.multi(futures) + stop = time.perf_counter() #timer stops here when user is deleted + CHECK_ROUTES_DURATION_SECONDS.observe(stop - start) #histogram metric def add_hub_route(self, hub): """Add the default route for the Hub""" diff --git a/jupyterhub/user.py b/jupyterhub/user.py index 7d0bb801..947dd3ca 100644 --- a/jupyterhub/user.py +++ b/jupyterhub/user.py @@ -17,6 +17,7 @@ from ._version import _check_version, __version__ from .objects import Server from .spawner import LocalProcessSpawner from .crypto import encrypt, decrypt, CryptKeeper, EncryptionUnavailable, InvalidToken +from .metrics import TOTAL_USERS class UserDict(dict): """Like defaultdict, but for users @@ -39,6 +40,7 @@ class UserDict(dict): """Add a user to the UserDict""" if orm_user.id not in self: self[orm_user.id] = self.from_orm(orm_user) + TOTAL_USERS.inc() return self[orm_user.id] def __contains__(self, key): @@ -93,6 +95,7 @@ class UserDict(dict): self.db.delete(user) self.db.commit() # delete from dict after commit + TOTAL_USERS.dec() del self[user_id] def count_active_users(self):