Files
jupyterhub/jupyterhub/singleuser/extension.py

656 lines
23 KiB
Python

"""
Integrate JupyterHub auth with Jupyter Server as a Server Extension
Instead of earlier versions, implemented via subclassing jupyter-notebook's NotebookApp.
This code runs only in each user's Jupyter Server process.
Jupyter Server 2 provides two new APIs:
- IdentityProvider, which authenticates the user making the request
- Authorizer, which determines whether an authenticated user is authorized to take a particular action
This Extension implements both for resolving permissions with JupyterHub scopes.
By default, in JupyterHub we only _authenticate_ users with sufficient `access:servers` permissions,
therefore the JupyterHub Authorizer allows any authenticated user to take any action,
but custom deployments may refine these permission to enable e.g. read-only access.
- Jupyter Server extension documentation: https://jupyter-server.readthedocs.io/en/latest/developers/extensions.html
- Jupyter Server authentication API documentation: https://jupyter-server.readthedocs.io/en/latest/operators/security.html
Requires Jupyter Server 2.0, which in turn requires Python 3.7.
"""
from __future__ import annotations
import asyncio
import json
import os
import random
from datetime import timezone
from functools import wraps
from pathlib import Path
from unittest import mock
from urllib.parse import urlparse
from jupyter_server.auth import Authorizer, IdentityProvider, User
from jupyter_server.auth.logout import LogoutHandler
from jupyter_server.extension.application import ExtensionApp
from tornado.httpclient import AsyncHTTPClient, HTTPRequest
from tornado.httputil import url_concat
from tornado.web import HTTPError
from traitlets import Any, Bool, Instance, Integer, Unicode, default
from jupyterhub._version import __version__, _check_version
from jupyterhub.log import log_request
from jupyterhub.services.auth import HubOAuth, HubOAuthCallbackHandler
from jupyterhub.utils import (
exponential_backoff,
isoformat,
make_ssl_context,
url_path_join,
)
from ._disable_user_config import _disable_user_config
SINGLEUSER_TEMPLATES_DIR = str(Path(__file__).parent.resolve().joinpath("templates"))
def _bool_env(key):
"""Cast an environment variable to bool
0, empty, or unset is False; All other values are True.
"""
if os.environ.get(key, "") in {"", "0"}:
return False
else:
return True
def _exclude_home(path_list):
"""Filter out any entries in a path list that are in my home directory.
Used to disable per-user configuration.
"""
home = os.path.expanduser('~/')
for p in path_list:
if not p.startswith(home):
yield p
class JupyterHubLogoutHandler(LogoutHandler):
def get(self):
hub_auth = self.identity_provider.hub_auth
# clear token stored in single-user cookie (set by hub_auth)
hub_auth.clear_cookie(self)
# redirect to hub to begin logging out of JupyterHub itself
self.redirect(hub_auth.hub_host + url_path_join(hub_auth.hub_prefix, "logout"))
class JupyterHubUser(User):
"""Subclass jupyter_server User to store JupyterHub user info"""
# not dataclass fields,
# so these aren't returned in the identity model via the REST API.
# The could be, though!
hub_user: dict
def __init__(self, hub_user):
self.hub_user = hub_user
super().__init__(username=self.hub_user["name"])
class JupyterHubOAuthCallbackHandler(HubOAuthCallbackHandler):
"""Callback handler for completing OAuth with JupyterHub"""
def initialize(self, hub_auth):
self.hub_auth = hub_auth
class JupyterHubIdentityProvider(IdentityProvider):
"""Identity Provider for JupyterHub OAuth
Replacement for JupyterHub's HubAuthenticated mixin
"""
logout_handler_class = JupyterHubLogoutHandler
hub_auth = Instance(HubOAuth)
@property
def token(self):
return self.hub_auth.api_token
token_generated = False
@default("hub_auth")
def _default_hub_auth(self):
# HubAuth gets most of its config from the environment
return HubOAuth(parent=self)
def _patch_get_login_url(self, handler):
original_get_login_url = handler.get_login_url
def get_login_url():
"""Return the Hub's login URL, to begin login redirect"""
login_url = self.hub_auth.login_url
# add state argument to OAuth url
state = self.hub_auth.set_state_cookie(
handler, next_url=handler.request.uri
)
login_url = url_concat(login_url, {'state': state})
# temporary override at setting level,
# to allow any subclass overrides of get_login_url to preserve their effect;
# for example, APIHandler raises 403 to prevent redirects
with mock.patch.dict(
handler.application.settings, {"login_url": login_url}
):
self.log.debug("Redirecting to login url: %s", login_url)
return original_get_login_url()
handler.get_login_url = get_login_url
async def get_user(self, handler):
if hasattr(handler, "_jupyterhub_user"):
return handler._jupyterhub_user
self._patch_get_login_url(handler)
user = await self.hub_auth.get_user(handler, sync=False)
if user is None:
handler._jupyterhub_user = None
return None
# check access scopes - don't allow even authenticated
# users with no access to this service past this stage.
# this is technically the Authorizer's job (below),
# but the IdentityProvider is the only protection on handlers
# decorated only with tornado's `@web.authenticated`,
# that haven't adopted the Jupyter Server 2 authorization decorators.
# so we check access scopes here, to be safe.
self.log.debug(
f"Checking user {user['name']} with scopes {user['scopes']} against {self.hub_auth.access_scopes}"
)
scopes = self.hub_auth.check_scopes(self.hub_auth.access_scopes, user)
if scopes:
self.log.debug(f"Allowing user {user['name']} with scopes {scopes}")
else:
self.log.warning(f"Not allowing user {user['name']}")
# User is authenticated, but not authorized.
# Override redirect so if/when tornado @web.authenticated
# tries to redirect to login URL, 403 will be raised instead.
# This is not the best, but avoids problems that can be caused
# when get_current_user is allowed to raise,
# and avoids redirect loops for users who are logged it,
# but not allowed to access this resource.
def raise_on_redirect(*args, **kwargs):
raise HTTPError(403, "{kind} {name} is not allowed.".format(**user))
handler.redirect = raise_on_redirect
return None
handler._jupyterhub_user = JupyterHubUser(user)
return handler._jupyterhub_user
def get_handlers(self):
"""Register our OAuth callback handler"""
return [
("/logout", self.logout_handler_class),
(
"/oauth_callback",
JupyterHubOAuthCallbackHandler,
{"hub_auth": self.hub_auth},
),
]
def validate_security(self, app, ssl_options=None):
"""Prevent warnings about security from base class"""
return
def page_config_hook(self, handler, page_config):
"""JupyterLab page config hook
Adds JupyterHub info to page config.
Places the JupyterHub API token in PageConfig.token.
Only has effect on jupyterlab_server >=2.9
"""
user = handler.current_user
# originally implemented in jupyterlab's LabApp
page_config["hubUser"] = user.name if user else ""
page_config["hubPrefix"] = hub_prefix = self.hub_auth.hub_prefix
page_config["hubHost"] = self.hub_auth.hub_host
page_config["shareUrl"] = url_path_join(hub_prefix, "user-redirect")
page_config["hubServerName"] = os.environ.get("JUPYTERHUB_SERVER_NAME", "")
page_config["token"] = self.hub_auth.get_token(handler) or ""
return page_config
class JupyterHubAuthorizer(Authorizer):
"""Authorizer that looks for permissions in JupyterHub scopes.
Currently only checks the `access:servers` scope(s),
which ought to be redundant with checks already in `JupyterHubIdentityProvider` for safety.
"""
@property
def hub_auth(self):
return self.identity_provider.hub_auth
def is_authorized(self, handler, user, action, resource):
"""
Return whether the authenticated user has permission to perform `action` on `resource`.
Currently: action and resource are ignored,
and only the `access:servers` scope is checked.
This method can be overridden (in combination with custom scopes) to implement granular permissions,
such as read-only access or access to subsets of the server.
"""
# This check for access scopes is redundant
# with the IdentityProvider above,
# but better to be redundant than allow unauthorized actions.
# If we remove a redundant check,
# it should be the one in the identity provider,
# not this one.
have_scopes = self.hub_auth.check_scopes(
self.hub_auth.oauth_scopes, user.hub_user
)
self.log.debug(
f"{user.username} has permissions {have_scopes} required to {action} on {resource}"
)
return bool(have_scopes)
def _fatal_errors(f):
"""Decorator to make errors fatal to the server app
Ensures our extension is loaded or the server exits,
rather than starting a server without jupyterhub auth enabled.
"""
@wraps(f)
def wrapped(self, *args, **kwargs):
try:
r = f(self, *args, **kwargs)
except Exception:
self.log.exception("Failed to load JupyterHubSingleUser server extension")
self.exit(1)
return wrapped
class JupyterHubSingleUser(ExtensionApp):
"""Jupyter Server extension entrypoint.
Enables JupyterHub authentication
and some JupyterHub-specific configuration from environment variables
Server extensions are loaded before the rest of the server is set up
"""
name = app_namespace = "jupyterhub-singleuser"
version = __version__
load_other_extensions = os.environ.get(
"JUPYTERHUB_SINGLEUSER_LOAD_OTHER_EXTENSIONS", "1"
) not in {"", "0"}
# Most of this is _copied_ from the SingleUserNotebookApp mixin,
# which will be deprecated over time
# (i.e. once we can _require_ jupyter server 2.0)
# this is a _class_ attribute to deal with the lifecycle
# of when it's loaded vs when it's checked
disable_user_config = False
hub_auth = Instance(HubOAuth)
@default("hub_auth")
def _default_hub_auth(self):
# HubAuth gets most of its config from the environment
return HubOAuth(parent=self)
# create dynamic default http client,
# configured with any relevant ssl config
hub_http_client = Any()
@default('hub_http_client')
def _default_client(self):
ssl_context = make_ssl_context(
self.hub_auth.keyfile,
self.hub_auth.certfile,
cafile=self.hub_auth.client_ca,
)
AsyncHTTPClient.configure(None, defaults={"ssl_options": ssl_context})
return AsyncHTTPClient()
async def check_hub_version(self):
"""Test a connection to my Hub
- exit if I can't connect at all
- check version and warn on sufficient mismatch
"""
client = self.hub_http_client
RETRIES = 5
for i in range(1, RETRIES + 1):
try:
resp = await client.fetch(self.hub_auth.api_url)
except Exception:
self.log.exception(
"Failed to connect to my Hub at %s (attempt %i/%i). Is it running?",
self.hub_auth.api_url,
i,
RETRIES,
)
await asyncio.sleep(min(2**i, 16))
else:
break
else:
self.exit(1)
hub_version = resp.headers.get('X-JupyterHub-Version')
_check_version(hub_version, __version__, self.log)
server_name = Unicode()
@default('server_name')
def _server_name_default(self):
return os.environ.get('JUPYTERHUB_SERVER_NAME', '')
hub_activity_url = Unicode(
config=True, help="URL for sending JupyterHub activity updates"
)
@default('hub_activity_url')
def _default_activity_url(self):
return os.environ.get('JUPYTERHUB_ACTIVITY_URL', '')
hub_activity_interval = Integer(
300,
config=True,
help="""
Interval (in seconds) on which to update the Hub
with our latest activity.
""",
)
@default('hub_activity_interval')
def _default_activity_interval(self):
env_value = os.environ.get('JUPYTERHUB_ACTIVITY_INTERVAL')
if env_value:
return int(env_value)
else:
return 300
_last_activity_sent = Any(allow_none=True)
async def notify_activity(self):
"""Notify jupyterhub of activity"""
client = self.hub_http_client
last_activity = self.serverapp.web_app.last_activity()
if not last_activity:
self.log.debug("No activity to send to the Hub")
return
if last_activity:
# protect against mixed timezone comparisons
if not last_activity.tzinfo:
# assume naive timestamps are utc
self.log.warning("last activity is using naive timestamps")
last_activity = last_activity.replace(tzinfo=timezone.utc)
if self._last_activity_sent and last_activity < self._last_activity_sent:
self.log.debug("No activity since %s", self._last_activity_sent)
return
last_activity_timestamp = isoformat(last_activity)
async def notify():
self.log.debug("Notifying Hub of activity %s", last_activity_timestamp)
req = HTTPRequest(
url=self.hub_activity_url,
method='POST',
headers={
"Authorization": f"token {self.hub_auth.api_token}",
"Content-Type": "application/json",
},
body=json.dumps(
{
'servers': {
self.server_name: {'last_activity': last_activity_timestamp}
},
'last_activity': last_activity_timestamp,
}
),
)
try:
await client.fetch(req)
except Exception:
self.log.exception("Error notifying Hub of activity")
return False
else:
return True
await exponential_backoff(
notify,
fail_message="Failed to notify Hub of activity",
start_wait=1,
max_wait=15,
timeout=60,
)
self._last_activity_sent = last_activity
async def keep_activity_updated(self):
if not self.hub_activity_url or not self.hub_activity_interval:
self.log.warning("Activity events disabled")
return
self.log.info(
"Updating Hub with activity every %s seconds", self.hub_activity_interval
)
while True:
try:
await self.notify_activity()
except Exception as e:
self.log.exception("Error notifying Hub of activity")
# add 20% jitter to the interval to avoid alignment
# of lots of requests from user servers
t = self.hub_activity_interval * (1 + 0.2 * (random.random() - 0.5))
await asyncio.sleep(t)
def _log_app_versions(self):
"""Log application versions at startup
Logs versions of jupyterhub and singleuser-server base versions (jupyterlab, jupyter_server, notebook)
"""
self.log.info(
f"Starting jupyterhub single-user server extension version {__version__}"
)
@_fatal_errors
def load_config_file(self):
"""Load JupyterHub singleuser config from the environment"""
self._log_app_versions()
if not os.environ.get('JUPYTERHUB_SERVICE_URL'):
raise KeyError("Missing required environment $JUPYTERHUB_SERVICE_URL")
cfg = self.config.ServerApp
cfg.identity_provider_class = JupyterHubIdentityProvider
# disable some single-user features
cfg.allow_remote_access = True
cfg.open_browser = False
cfg.trust_xheaders = True
cfg.quit_button = False
cfg.port_retries = 0
cfg.answer_yes = True
self.config.FileContentsManager.delete_to_trash = False
# load http server config from environment
url = urlparse(os.environ['JUPYTERHUB_SERVICE_URL'])
if url.port:
cfg.port = url.port
elif url.scheme == 'http':
cfg.port = 80
elif url.scheme == 'https':
cfg.port = 443
if url.hostname:
cfg.ip = url.hostname
else:
cfg.ip = "127.0.0.1"
cfg.base_url = os.environ.get('JUPYTERHUB_SERVICE_PREFIX') or '/'
# load default_url at all kinds of priority,
# to make sure it has the desired effect
cfg.default_url = self.default_url = self.get_default_url()
# load internal SSL configuration
cfg.keyfile = os.environ.get('JUPYTERHUB_SSL_KEYFILE') or ''
cfg.certfile = os.environ.get('JUPYTERHUB_SSL_CERTFILE') or ''
cfg.client_ca = os.environ.get('JUPYTERHUB_SSL_CLIENT_CA') or ''
if cfg.certfile:
self.serverapp.log.info(f"Using SSL cert {cfg.certfile}")
# Jupyter Server default: config files have higher priority than extensions,
# by:
# 1. load config files
# 2. load extension config
# 3. merge file config into extension config
# we invert that by merging our extension config into server config before
# they get merged the other way
# this way config from this extension should always have highest priority
self.serverapp.update_config(self.config)
# add our custom templates
self.config.NotebookApp.extra_template_paths.append(SINGLEUSER_TEMPLATES_DIR)
@default("default_url")
def get_default_url(self):
# 1. explicit via _user_ config (?)
if 'default_url' in self.serverapp.config.ServerApp:
default_url = self.serverapp.config.ServerApp.default_url
self.log.info(f"Using default url from user config: {default_url}")
return default_url
# 2. explicit via JupyterHub admin config (c.Spawner.default_url)
default_url = os.environ.get("JUPYTERHUB_DEFAULT_URL")
if default_url:
self.log.info(
f"Using default url from environment $JUPYTERHUB_DEFAULT_URL: {default_url}"
)
return default_url
# 3. look for known UI extensions
# priority:
# 1. lab
# 2. nbclassic
# 3. retro
extension_points = self.serverapp.extension_manager.extension_points
for name in ["lab", "retro", "nbclassic"]:
if name in extension_points:
default_url = extension_points[name].app.default_url
if default_url and default_url != "/":
self.log.info(
f"Using default url from server extension {name}: {default_url}"
)
return default_url
self.log.warning(
"No default url found in config or known extensions, searching other extensions for default_url"
)
# 3. _any_ UI extension
# 2. discover other extensions
for (
name,
extension_point,
) in extension_points.items():
app = extension_point.app
if app is self or not app:
continue
default_url = app.default_url
if default_url and default_url != "/":
self.log.info(
f"Using default url from server extension {name}: {default_url}"
)
return default_url
self.log.warning(
"Found no extension with a default URL, UI will likely be unavailable"
)
return "/"
def initialize_templates(self):
"""Patch classic-noteboook page templates to add Hub-related buttons"""
app = self.serverapp
jinja_template_vars = app.jinja_template_vars
# override template vars
jinja_template_vars['logo_url'] = self.hub_auth.hub_host + url_path_join(
self.hub_auth.hub_prefix, 'logo'
)
jinja_template_vars[
'hub_control_panel_url'
] = self.hub_auth.hub_host + url_path_join(self.hub_auth.hub_prefix, 'home')
_activity_task = None
@_fatal_errors
def initialize(self, args=None):
# initialize takes place after
# 1. config has been loaded
# 2. Configurables instantiated
# 3. serverapp.web_app set up
super().initialize()
app = self.serverapp
app.web_app.settings[
"page_config_hook"
] = app.identity_provider.page_config_hook
app.web_app.settings["log_function"] = log_request
# add jupyterhub version header
headers = app.web_app.settings.setdefault("headers", {})
headers["X-JupyterHub-Version"] = __version__
# check jupyterhub version
app.io_loop.run_sync(self.check_hub_version)
async def _start_activity():
self._activity_task = asyncio.ensure_future(self.keep_activity_updated())
app.io_loop.run_sync(_start_activity)
async def stop_extension(self):
if self._activity_task:
self._activity_task.cancel()
disable_user_config = Bool()
@default("disable_user_config")
def _defaut_disable_user_config(self):
return _bool_env("JUPYTERHUB_DISABLE_USER_CONFIG")
@classmethod
def make_serverapp(cls, **kwargs):
"""Instantiate the ServerApp
Override to customize the ServerApp before it loads any configuration
"""
serverapp = super().make_serverapp(**kwargs)
if _bool_env("JUPYTERHUB_DISABLE_USER_CONFIG"):
# disable user-controllable config
_disable_user_config(serverapp)
if _bool_env("JUPYTERHUB_SINGLEUSER_TEST_EXTENSION"):
serverapp.log.warning("Enabling jupyterhub test extension")
serverapp.jpserver_extensions["jupyterhub.tests.extension"] = True
return serverapp
main = JupyterHubSingleUser.launch_instance
if __name__ == "__main__":
main()