Merge branch 'master' into end-to-end-ssl

This commit is contained in:
Min RK
2018-10-12 16:17:26 +02:00
committed by GitHub
18 changed files with 118 additions and 25 deletions

View File

@@ -2,7 +2,9 @@
mock mock
beautifulsoup4 beautifulsoup4
codecov codecov
coverage<5 # pin coverage to < 5 due to coveragepy#716
cryptography cryptography
html5lib # needed for beautifulsoup
pytest-cov pytest-cov
pytest-tornado pytest-tornado
pytest>=3.3 pytest>=3.3

View File

@@ -124,7 +124,7 @@ hex-encoded string. You can set it this way:
.. code-block:: bash .. code-block:: bash
export JPY_COOKIE_SECRET=`openssl rand -hex 32` export JPY_COOKIE_SECRET=$(openssl rand -hex 32)
For security reasons, this environment variable should only be visible to the For security reasons, this environment variable should only be visible to the
Hub. If you set it dynamically as above, all users will be logged out each time Hub. If you set it dynamically as above, all users will be logged out each time
@@ -173,7 +173,7 @@ using the ``CONFIGPROXY_AUTH_TOKEN`` environment variable:
.. code-block:: bash .. code-block:: bash
export CONFIGPROXY_AUTH_TOKEN='openssl rand -hex 32' export CONFIGPROXY_AUTH_TOKEN=$(openssl rand -hex 32)
This environment variable needs to be visible to the Hub and Proxy. This environment variable needs to be visible to the Hub and Proxy.

View File

@@ -88,7 +88,7 @@ c.JupyterHub.services = [
{ {
'name': 'cull-idle', 'name': 'cull-idle',
'admin': True, 'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
} }
] ]
``` ```

View File

@@ -93,7 +93,7 @@ c.JupyterHub.services = [
{ {
'name': 'cull-idle', 'name': 'cull-idle',
'admin': True, 'admin': True,
'command': ['python', '/path/to/cull-idle.py', '--timeout'] 'command': [sys.executable, '/path/to/cull-idle.py', '--timeout']
} }
] ]
``` ```

View File

@@ -118,7 +118,7 @@ Here's an example on what you could do in your shell script. See also
# - The first parameter for the Bootstrap Script is the USER. # - The first parameter for the Bootstrap Script is the USER.
USER=$1 USER=$1
if ["$USER" == ""]; then if [ "$USER" == "" ]; then
exit 1 exit 1
fi fi
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------

View File

@@ -6,7 +6,7 @@
# - The first parameter for the Bootstrap Script is the USER. # - The first parameter for the Bootstrap Script is the USER.
USER=$1 USER=$1
if ["$USER" == ""]; then if [ "$USER" == "" ]; then
exit 1 exit 1
fi fi
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------

View File

@@ -15,7 +15,7 @@ c.JupyterHub.services = [
{ {
'name': 'cull-idle', 'name': 'cull-idle',
'admin': True, 'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
} }
] ]
``` ```
@@ -36,6 +36,6 @@ Generate an API token and store it in the `JUPYTERHUB_API_TOKEN` environment
variable. Run `cull_idle_servers.py` manually. variable. Run `cull_idle_servers.py` manually.
```bash ```bash
export JUPYTERHUB_API_TOKEN=`jupyterhub token` export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
``` ```

View File

@@ -16,13 +16,13 @@ You can run this as a service managed by JupyterHub with this in your config::
{ {
'name': 'cull-idle', 'name': 'cull-idle',
'admin': True, 'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
} }
] ]
Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`: Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
export JUPYTERHUB_API_TOKEN=`jupyterhub token` export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
This script uses the same ``--timeout`` and ``--max-age`` values for This script uses the same ``--timeout`` and ``--max-age`` values for

View File

@@ -3,6 +3,6 @@ c.JupyterHub.services = [
{ {
'name': 'cull-idle', 'name': 'cull-idle',
'admin': True, 'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(), 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
} }
] ]

View File

@@ -18,7 +18,7 @@ implementations in other web servers or languages.
1. generate an API token: 1. generate an API token:
export JUPYTERHUB_API_TOKEN=`openssl rand -hex 32` export JUPYTERHUB_API_TOKEN=$(openssl rand -hex 32)
2. launch a version of the the whoami service. 2. launch a version of the the whoami service.
For `whoami-oauth`: For `whoami-oauth`:

View File

@@ -11,7 +11,7 @@ configuration file something like:
{ {
'name': 'announcement', 'name': 'announcement',
'url': 'http://127.0.0.1:8888', 'url': 'http://127.0.0.1:8888',
'command': ["python", "-m", "announcement"], 'command': [sys.executable, "-m", "announcement"],
} }
] ]

View File

@@ -5,7 +5,7 @@ c.JupyterHub.services = [
{ {
'name': 'announcement', 'name': 'announcement',
'url': 'http://127.0.0.1:8888', 'url': 'http://127.0.0.1:8888',
'command': ["python", "-m", "announcement"], 'command': [sys.executable, "-m", "announcement"],
} }
] ]

View File

@@ -1,4 +1,4 @@
export CONFIGPROXY_AUTH_TOKEN=`openssl rand -hex 32` export CONFIGPROXY_AUTH_TOKEN=$(openssl rand -hex 32)
# start JupyterHub # start JupyterHub
jupyterhub --ip=127.0.0.1 jupyterhub --ip=127.0.0.1

View File

@@ -32,7 +32,8 @@ from ..utils import maybe_future, url_path_join
from ..metrics import ( from ..metrics import (
SERVER_SPAWN_DURATION_SECONDS, ServerSpawnStatus, SERVER_SPAWN_DURATION_SECONDS, ServerSpawnStatus,
PROXY_ADD_DURATION_SECONDS, ProxyAddStatus, PROXY_ADD_DURATION_SECONDS, ProxyAddStatus,
RUNNING_SERVERS SERVER_POLL_DURATION_SECONDS, ServerPollStatus,
RUNNING_SERVERS, SERVER_STOP_DURATION_SECONDS, ServerStopStatus
) )
# pattern for the authentication token header # pattern for the authentication token header
@@ -821,13 +822,19 @@ class BaseHandler(RequestHandler):
# start has finished, but the server hasn't come up # start has finished, but the server hasn't come up
# check if the server died while we were waiting # check if the server died while we were waiting
poll_start_time = time.perf_counter()
status = await spawner.poll() status = await spawner.poll()
SERVER_POLL_DURATION_SECONDS.labels(
status=ServerPollStatus.from_status(status)
).observe(time.perf_counter() - poll_start_time)
if status is not None: if status is not None:
toc = IOLoop.current().time() toc = IOLoop.current().time()
self.statsd.timing('spawner.failure', (toc - tic) * 1000) self.statsd.timing('spawner.failure', (toc - tic) * 1000)
SERVER_SPAWN_DURATION_SECONDS.labels( SERVER_SPAWN_DURATION_SECONDS.labels(
status=ServerSpawnStatus.failure status=ServerSpawnStatus.failure
).observe(time.perf_counter() - spawn_start_time) ).observe(time.perf_counter() - spawn_start_time)
raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % ( raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % (
status, spawner._log_name)) status, spawner._log_name))
@@ -848,9 +855,17 @@ class BaseHandler(RequestHandler):
async def user_stopped(self, user, server_name): async def user_stopped(self, user, server_name):
"""Callback that fires when the spawner has stopped""" """Callback that fires when the spawner has stopped"""
spawner = user.spawners[server_name] spawner = user.spawners[server_name]
poll_start_time = time.perf_counter()
status = await spawner.poll() status = await spawner.poll()
SERVER_POLL_DURATION_SECONDS.labels(
status=ServerPollStatus.from_status(status)
).observe(time.perf_counter() - poll_start_time)
if status is None: if status is None:
status = 'unknown' status = 'unknown'
self.log.warning("User %s server stopped, with exit code: %s", self.log.warning("User %s server stopped, with exit code: %s",
user.name, status, user.name, status,
) )
@@ -874,18 +889,25 @@ class BaseHandler(RequestHandler):
2. stop the server 2. stop the server
3. notice that it stopped 3. notice that it stopped
""" """
tic = IOLoop.current().time() tic = time.perf_counter()
try: try:
await self.proxy.delete_user(user, server_name) await self.proxy.delete_user(user, server_name)
await user.stop(server_name) await user.stop(server_name)
toc = time.perf_counter()
self.log.info("User %s server took %.3f seconds to stop", user.name, toc - tic)
self.statsd.timing('spawner.stop', (toc - tic) * 1000)
RUNNING_SERVERS.dec()
SERVER_STOP_DURATION_SECONDS.labels(
status=ServerStopStatus.success
).observe(toc - tic)
except:
SERVER_STOP_DURATION_SECONDS.labels(
status=ServerStopStatus.failure
).observe(time.perf_counter() - tic)
finally: finally:
spawner._stop_future = None spawner._stop_future = None
spawner._stop_pending = False spawner._stop_pending = False
toc = IOLoop.current().time()
self.log.info("User %s server took %.3f seconds to stop", user.name, toc - tic)
self.statsd.timing('spawner.stop', (toc - tic) * 1000)
RUNNING_SERVERS.dec()
future = spawner._stop_future = asyncio.ensure_future(stop()) future = spawner._stop_future = asyncio.ensure_future(stop())
@@ -1152,10 +1174,13 @@ class UserSpawnHandler(BaseHandler):
# spawn has supposedly finished, check on the status # spawn has supposedly finished, check on the status
if spawner.ready: if spawner.ready:
poll_start_time = time.perf_counter()
status = await spawner.poll() status = await spawner.poll()
SERVER_POLL_DURATION_SECONDS.labels(
status=ServerPollStatus.from_status(status)
).observe(time.perf_counter() - poll_start_time)
else: else:
status = 0 status = 0
# server is not running, trigger spawn # server is not running, trigger spawn
if status is not None: if status is not None:
if spawner.options_form: if spawner.options_form:

View File

@@ -12,7 +12,7 @@ from tornado import web, gen
from tornado.httputil import url_concat from tornado.httputil import url_concat
from .. import orm from .. import orm
from ..utils import admin_only, url_path_join from ..utils import admin_only, url_path_join, maybe_future
from .base import BaseHandler from .base import BaseHandler
@@ -147,7 +147,7 @@ class SpawnHandler(BaseHandler):
for key, byte_list in self.request.files.items(): for key, byte_list in self.request.files.items():
form_options["%s_file"%key] = byte_list form_options["%s_file"%key] = byte_list
try: try:
options = user.spawner.options_from_form(form_options) options = await maybe_future(user.spawner.options_from_form(form_options))
await self.spawn_single_user(user, options=options) await self.spawn_single_user(user, options=options)
except Exception as e: except Exception as e:
self.log.error("Failed to spawn single-user server with form", exc_info=True) self.log.error("Failed to spawn single-user server with form", exc_info=True)

View File

@@ -37,11 +37,23 @@ SERVER_SPAWN_DURATION_SECONDS = Histogram(
RUNNING_SERVERS = Gauge( RUNNING_SERVERS = Gauge(
'running_servers', 'running_servers',
'the number of user servers currently running', 'the number of user servers currently running'
) )
RUNNING_SERVERS.set(0) RUNNING_SERVERS.set(0)
TOTAL_USERS = Gauge(
'total_users',
'toal number of users'
)
TOTAL_USERS.set(0)
CHECK_ROUTES_DURATION_SECONDS = Histogram(
'check_routes_duration_seconds',
'Time taken to validate all routes in proxy'
)
class ServerSpawnStatus(Enum): class ServerSpawnStatus(Enum):
""" """
Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS
@@ -79,6 +91,52 @@ class ProxyAddStatus(Enum):
for s in ProxyAddStatus: for s in ProxyAddStatus:
PROXY_ADD_DURATION_SECONDS.labels(status=s) PROXY_ADD_DURATION_SECONDS.labels(status=s)
SERVER_POLL_DURATION_SECONDS = Histogram(
'server_poll_duration_seconds',
'time taken to poll if server is running',
['status']
)
class ServerPollStatus(Enum):
"""
Possible values for 'status' label of SERVER_POLL_DURATION_SECONDS
"""
running = 'running'
stopped = 'stopped'
@classmethod
def from_status(cls, status):
"""Return enum string for a given poll status"""
if status is None:
return cls.running
return cls.stopped
for s in ServerPollStatus:
SERVER_POLL_DURATION_SECONDS.labels(status=s)
SERVER_STOP_DURATION_SECONDS = Histogram(
'server_stop_seconds',
'time taken for server stopping operation',
['status'],
)
class ServerStopStatus(Enum):
"""
Possible values for 'status' label of SERVER_STOP_DURATION_SECONDS
"""
success = 'success'
failure = 'failure'
def __str__(self):
return self.value
for s in ServerStopStatus:
SERVER_STOP_DURATION_SECONDS.labels(status=s)
def prometheus_log_method(handler): def prometheus_log_method(handler):
""" """
Tornado log handler for recording RED metrics. Tornado log handler for recording RED metrics.

View File

@@ -39,6 +39,8 @@ from traitlets import (
from jupyterhub.traitlets import Command from jupyterhub.traitlets import Command
from traitlets.config import LoggingConfigurable from traitlets.config import LoggingConfigurable
from .metrics import CHECK_ROUTES_DURATION_SECONDS
from .objects import Server from .objects import Server
from . import utils from . import utils
from .utils import url_path_join, make_ssl_context from .utils import url_path_join, make_ssl_context
@@ -292,6 +294,7 @@ class Proxy(LoggingConfigurable):
@_one_at_a_time @_one_at_a_time
async def check_routes(self, user_dict, service_dict, routes=None): async def check_routes(self, user_dict, service_dict, routes=None):
"""Check that all users are properly routed on the proxy.""" """Check that all users are properly routed on the proxy."""
start = time.perf_counter() #timer starts here when user is created
if not routes: if not routes:
self.log.debug("Fetching routes to check") self.log.debug("Fetching routes to check")
routes = await self.get_all_routes() routes = await self.get_all_routes()
@@ -364,6 +367,8 @@ class Proxy(LoggingConfigurable):
futures.append(self.delete_route(routespec)) futures.append(self.delete_route(routespec))
await gen.multi(futures) await gen.multi(futures)
stop = time.perf_counter() #timer stops here when user is deleted
CHECK_ROUTES_DURATION_SECONDS.observe(stop - start) #histogram metric
def add_hub_route(self, hub): def add_hub_route(self, hub):
"""Add the default route for the Hub""" """Add the default route for the Hub"""

View File

@@ -17,6 +17,7 @@ from ._version import _check_version, __version__
from .objects import Server from .objects import Server
from .spawner import LocalProcessSpawner from .spawner import LocalProcessSpawner
from .crypto import encrypt, decrypt, CryptKeeper, EncryptionUnavailable, InvalidToken from .crypto import encrypt, decrypt, CryptKeeper, EncryptionUnavailable, InvalidToken
from .metrics import TOTAL_USERS
class UserDict(dict): class UserDict(dict):
"""Like defaultdict, but for users """Like defaultdict, but for users
@@ -39,6 +40,7 @@ class UserDict(dict):
"""Add a user to the UserDict""" """Add a user to the UserDict"""
if orm_user.id not in self: if orm_user.id not in self:
self[orm_user.id] = self.from_orm(orm_user) self[orm_user.id] = self.from_orm(orm_user)
TOTAL_USERS.inc()
return self[orm_user.id] return self[orm_user.id]
def __contains__(self, key): def __contains__(self, key):
@@ -93,6 +95,7 @@ class UserDict(dict):
self.db.delete(user) self.db.delete(user)
self.db.commit() self.db.commit()
# delete from dict after commit # delete from dict after commit
TOTAL_USERS.dec()
del self[user_id] del self[user_id]
def count_active_users(self): def count_active_users(self):