Merge branch 'master' into end-to-end-ssl

This commit is contained in:
Min RK
2018-10-12 16:17:26 +02:00
committed by GitHub
18 changed files with 118 additions and 25 deletions

View File

@@ -2,7 +2,9 @@
mock
beautifulsoup4
codecov
coverage<5 # pin coverage to < 5 due to coveragepy#716
cryptography
html5lib # needed for beautifulsoup
pytest-cov
pytest-tornado
pytest>=3.3

View File

@@ -124,7 +124,7 @@ hex-encoded string. You can set it this way:
.. code-block:: bash
export JPY_COOKIE_SECRET=`openssl rand -hex 32`
export JPY_COOKIE_SECRET=$(openssl rand -hex 32)
For security reasons, this environment variable should only be visible to the
Hub. If you set it dynamically as above, all users will be logged out each time
@@ -173,7 +173,7 @@ using the ``CONFIGPROXY_AUTH_TOKEN`` environment variable:
.. code-block:: bash
export CONFIGPROXY_AUTH_TOKEN='openssl rand -hex 32'
export CONFIGPROXY_AUTH_TOKEN=$(openssl rand -hex 32)
This environment variable needs to be visible to the Hub and Proxy.

View File

@@ -88,7 +88,7 @@ c.JupyterHub.services = [
{
'name': 'cull-idle',
'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(),
'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
}
]
```

View File

@@ -93,7 +93,7 @@ c.JupyterHub.services = [
{
'name': 'cull-idle',
'admin': True,
'command': ['python', '/path/to/cull-idle.py', '--timeout']
'command': [sys.executable, '/path/to/cull-idle.py', '--timeout']
}
]
```

View File

@@ -15,7 +15,7 @@ c.JupyterHub.services = [
{
'name': 'cull-idle',
'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(),
'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
}
]
```
@@ -36,6 +36,6 @@ Generate an API token and store it in the `JUPYTERHUB_API_TOKEN` environment
variable. Run `cull_idle_servers.py` manually.
```bash
export JUPYTERHUB_API_TOKEN=`jupyterhub token`
export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
```

View File

@@ -16,13 +16,13 @@ You can run this as a service managed by JupyterHub with this in your config::
{
'name': 'cull-idle',
'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(),
'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
}
]
Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
export JUPYTERHUB_API_TOKEN=`jupyterhub token`
export JUPYTERHUB_API_TOKEN=$(jupyterhub token)
python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
This script uses the same ``--timeout`` and ``--max-age`` values for

View File

@@ -3,6 +3,6 @@ c.JupyterHub.services = [
{
'name': 'cull-idle',
'admin': True,
'command': 'python3 cull_idle_servers.py --timeout=3600'.split(),
'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'],
}
]

View File

@@ -18,7 +18,7 @@ implementations in other web servers or languages.
1. generate an API token:
export JUPYTERHUB_API_TOKEN=`openssl rand -hex 32`
export JUPYTERHUB_API_TOKEN=$(openssl rand -hex 32)
2. launch a version of the the whoami service.
For `whoami-oauth`:

View File

@@ -11,7 +11,7 @@ configuration file something like:
{
'name': 'announcement',
'url': 'http://127.0.0.1:8888',
'command': ["python", "-m", "announcement"],
'command': [sys.executable, "-m", "announcement"],
}
]

View File

@@ -5,7 +5,7 @@ c.JupyterHub.services = [
{
'name': 'announcement',
'url': 'http://127.0.0.1:8888',
'command': ["python", "-m", "announcement"],
'command': [sys.executable, "-m", "announcement"],
}
]

View File

@@ -1,4 +1,4 @@
export CONFIGPROXY_AUTH_TOKEN=`openssl rand -hex 32`
export CONFIGPROXY_AUTH_TOKEN=$(openssl rand -hex 32)
# start JupyterHub
jupyterhub --ip=127.0.0.1

View File

@@ -32,7 +32,8 @@ from ..utils import maybe_future, url_path_join
from ..metrics import (
SERVER_SPAWN_DURATION_SECONDS, ServerSpawnStatus,
PROXY_ADD_DURATION_SECONDS, ProxyAddStatus,
RUNNING_SERVERS
SERVER_POLL_DURATION_SECONDS, ServerPollStatus,
RUNNING_SERVERS, SERVER_STOP_DURATION_SECONDS, ServerStopStatus
)
# pattern for the authentication token header
@@ -821,13 +822,19 @@ class BaseHandler(RequestHandler):
# start has finished, but the server hasn't come up
# check if the server died while we were waiting
poll_start_time = time.perf_counter()
status = await spawner.poll()
SERVER_POLL_DURATION_SECONDS.labels(
status=ServerPollStatus.from_status(status)
).observe(time.perf_counter() - poll_start_time)
if status is not None:
toc = IOLoop.current().time()
self.statsd.timing('spawner.failure', (toc - tic) * 1000)
SERVER_SPAWN_DURATION_SECONDS.labels(
status=ServerSpawnStatus.failure
).observe(time.perf_counter() - spawn_start_time)
raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % (
status, spawner._log_name))
@@ -848,9 +855,17 @@ class BaseHandler(RequestHandler):
async def user_stopped(self, user, server_name):
"""Callback that fires when the spawner has stopped"""
spawner = user.spawners[server_name]
poll_start_time = time.perf_counter()
status = await spawner.poll()
SERVER_POLL_DURATION_SECONDS.labels(
status=ServerPollStatus.from_status(status)
).observe(time.perf_counter() - poll_start_time)
if status is None:
status = 'unknown'
self.log.warning("User %s server stopped, with exit code: %s",
user.name, status,
)
@@ -874,18 +889,25 @@ class BaseHandler(RequestHandler):
2. stop the server
3. notice that it stopped
"""
tic = IOLoop.current().time()
tic = time.perf_counter()
try:
await self.proxy.delete_user(user, server_name)
await user.stop(server_name)
toc = time.perf_counter()
self.log.info("User %s server took %.3f seconds to stop", user.name, toc - tic)
self.statsd.timing('spawner.stop', (toc - tic) * 1000)
RUNNING_SERVERS.dec()
SERVER_STOP_DURATION_SECONDS.labels(
status=ServerStopStatus.success
).observe(toc - tic)
except:
SERVER_STOP_DURATION_SECONDS.labels(
status=ServerStopStatus.failure
).observe(time.perf_counter() - tic)
finally:
spawner._stop_future = None
spawner._stop_pending = False
toc = IOLoop.current().time()
self.log.info("User %s server took %.3f seconds to stop", user.name, toc - tic)
self.statsd.timing('spawner.stop', (toc - tic) * 1000)
RUNNING_SERVERS.dec()
future = spawner._stop_future = asyncio.ensure_future(stop())
@@ -1152,10 +1174,13 @@ class UserSpawnHandler(BaseHandler):
# spawn has supposedly finished, check on the status
if spawner.ready:
poll_start_time = time.perf_counter()
status = await spawner.poll()
SERVER_POLL_DURATION_SECONDS.labels(
status=ServerPollStatus.from_status(status)
).observe(time.perf_counter() - poll_start_time)
else:
status = 0
# server is not running, trigger spawn
if status is not None:
if spawner.options_form:

View File

@@ -12,7 +12,7 @@ from tornado import web, gen
from tornado.httputil import url_concat
from .. import orm
from ..utils import admin_only, url_path_join
from ..utils import admin_only, url_path_join, maybe_future
from .base import BaseHandler
@@ -147,7 +147,7 @@ class SpawnHandler(BaseHandler):
for key, byte_list in self.request.files.items():
form_options["%s_file"%key] = byte_list
try:
options = user.spawner.options_from_form(form_options)
options = await maybe_future(user.spawner.options_from_form(form_options))
await self.spawn_single_user(user, options=options)
except Exception as e:
self.log.error("Failed to spawn single-user server with form", exc_info=True)

View File

@@ -37,11 +37,23 @@ SERVER_SPAWN_DURATION_SECONDS = Histogram(
RUNNING_SERVERS = Gauge(
'running_servers',
'the number of user servers currently running',
'the number of user servers currently running'
)
RUNNING_SERVERS.set(0)
TOTAL_USERS = Gauge(
'total_users',
'toal number of users'
)
TOTAL_USERS.set(0)
CHECK_ROUTES_DURATION_SECONDS = Histogram(
'check_routes_duration_seconds',
'Time taken to validate all routes in proxy'
)
class ServerSpawnStatus(Enum):
"""
Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS
@@ -79,6 +91,52 @@ class ProxyAddStatus(Enum):
for s in ProxyAddStatus:
PROXY_ADD_DURATION_SECONDS.labels(status=s)
SERVER_POLL_DURATION_SECONDS = Histogram(
'server_poll_duration_seconds',
'time taken to poll if server is running',
['status']
)
class ServerPollStatus(Enum):
"""
Possible values for 'status' label of SERVER_POLL_DURATION_SECONDS
"""
running = 'running'
stopped = 'stopped'
@classmethod
def from_status(cls, status):
"""Return enum string for a given poll status"""
if status is None:
return cls.running
return cls.stopped
for s in ServerPollStatus:
SERVER_POLL_DURATION_SECONDS.labels(status=s)
SERVER_STOP_DURATION_SECONDS = Histogram(
'server_stop_seconds',
'time taken for server stopping operation',
['status'],
)
class ServerStopStatus(Enum):
"""
Possible values for 'status' label of SERVER_STOP_DURATION_SECONDS
"""
success = 'success'
failure = 'failure'
def __str__(self):
return self.value
for s in ServerStopStatus:
SERVER_STOP_DURATION_SECONDS.labels(status=s)
def prometheus_log_method(handler):
"""
Tornado log handler for recording RED metrics.

View File

@@ -39,6 +39,8 @@ from traitlets import (
from jupyterhub.traitlets import Command
from traitlets.config import LoggingConfigurable
from .metrics import CHECK_ROUTES_DURATION_SECONDS
from .objects import Server
from . import utils
from .utils import url_path_join, make_ssl_context
@@ -292,6 +294,7 @@ class Proxy(LoggingConfigurable):
@_one_at_a_time
async def check_routes(self, user_dict, service_dict, routes=None):
"""Check that all users are properly routed on the proxy."""
start = time.perf_counter() #timer starts here when user is created
if not routes:
self.log.debug("Fetching routes to check")
routes = await self.get_all_routes()
@@ -364,6 +367,8 @@ class Proxy(LoggingConfigurable):
futures.append(self.delete_route(routespec))
await gen.multi(futures)
stop = time.perf_counter() #timer stops here when user is deleted
CHECK_ROUTES_DURATION_SECONDS.observe(stop - start) #histogram metric
def add_hub_route(self, hub):
"""Add the default route for the Hub"""

View File

@@ -17,6 +17,7 @@ from ._version import _check_version, __version__
from .objects import Server
from .spawner import LocalProcessSpawner
from .crypto import encrypt, decrypt, CryptKeeper, EncryptionUnavailable, InvalidToken
from .metrics import TOTAL_USERS
class UserDict(dict):
"""Like defaultdict, but for users
@@ -39,6 +40,7 @@ class UserDict(dict):
"""Add a user to the UserDict"""
if orm_user.id not in self:
self[orm_user.id] = self.from_orm(orm_user)
TOTAL_USERS.inc()
return self[orm_user.id]
def __contains__(self, key):
@@ -93,6 +95,7 @@ class UserDict(dict):
self.db.delete(user)
self.db.commit()
# delete from dict after commit
TOTAL_USERS.dec()
del self[user_id]
def count_active_users(self):