Files
jupyterhub/jupyterhub/services/auth.py
2024-05-07 11:33:59 +02:00

1618 lines
55 KiB
Python

"""Authenticating services with JupyterHub.
Tokens are sent to the Hub for verification.
The Hub replies with a JSON model describing the authenticated user.
This contains two levels of authentication:
- :class:`HubOAuth` - Use OAuth 2 to authenticate browsers with the Hub.
This should be used for any service that should respond to browser requests
(i.e. most services).
- :class:`HubAuth` - token-only authentication, for a service that only need to handle token-authenticated API requests
The ``Auth`` classes (:class:`HubAuth`, :class:`HubOAuth`)
can be used in any application, even outside tornado.
They contain reference implementations of talking to the Hub API
to resolve a token to a user.
The ``Authenticated`` classes (:class:`HubAuthenticated`, :class:`HubOAuthenticated`)
are mixins for tornado handlers that should authenticate with the Hub.
If you are using OAuth, you will also need to register an oauth callback handler to complete the oauth process.
A tornado implementation is provided in :class:`HubOAuthCallbackHandler`.
"""
import asyncio
import hashlib
import json
import os
import random
import re
import secrets
import socket
import string
import time
import warnings
from functools import partial
from http import HTTPStatus
from unittest import mock
from urllib.parse import urlencode, urlparse
from tornado.httpclient import AsyncHTTPClient, HTTPRequest
from tornado.httputil import url_concat
from tornado.log import app_log
from tornado.web import HTTPError, RequestHandler
from tornado.websocket import WebSocketHandler
from traitlets import (
Any,
Bool,
Dict,
Instance,
Integer,
Set,
Unicode,
default,
observe,
validate,
)
from traitlets.config import SingletonConfigurable
from .._xsrf_utils import (
_anonymous_xsrf_id,
_needs_check_xsrf,
_set_xsrf_cookie,
check_xsrf_cookie,
get_xsrf_token,
)
from ..scopes import _intersect_expanded_scopes
from ..utils import _bool_env, get_browser_protocol, url_path_join
def check_scopes(required_scopes, scopes):
"""Check that required_scope(s) are in scopes
Returns the subset of scopes matching required_scopes,
which is truthy if any scopes match any required scopes.
Correctly resolves scope filters *except* for groups -> user,
e.g. require: access:server!user=x, have: access:server!group=y
will not grant access to user x even if user x is in group y.
Parameters
----------
required_scopes: set
The set of scopes required.
scopes: set
The set (or list) of scopes to check against required_scopes
Returns
-------
relevant_scopes: set
The set of scopes in required_scopes that are present in scopes,
which is truthy if any required scopes are present,
and falsy otherwise.
"""
if isinstance(required_scopes, str):
required_scopes = {required_scopes}
intersection = _intersect_expanded_scopes(required_scopes, scopes)
# re-intersect with required_scopes in case the intersection
# applies stricter filters than required_scopes declares
# e.g. required_scopes = {'read:users'} and intersection has only {'read:users!user=x'}
return set(required_scopes) & intersection
class _ExpiringDict(dict):
"""Dict-like cache for Hub API requests
Values will expire after max_age seconds.
A monotonic timer is used (time.monotonic).
A max_age of 0 means cache forever.
"""
max_age = 0
purge_interval = 0
def __init__(self, max_age=0, purge_interval="max_age"):
self.max_age = max_age
if purge_interval == "max_age":
# default behavior: use max_age
purge_interval = max_age
self.purge_interval = purge_interval
self.timestamps = {}
self.values = {}
self._last_purge = time.monotonic()
def __len__(self):
return len(self.values)
def __setitem__(self, key, value):
"""Store key and record timestamp"""
self._maybe_purge()
self.timestamps[key] = time.monotonic()
self.values[key] = value
def __repr__(self):
"""include values and timestamps in repr"""
now = time.monotonic()
return repr(
{
key: '{value} (age={age:.0f}s)'.format(
value=repr(value)[:16] + '...', age=now - self.timestamps[key]
)
for key, value in self.values.items()
}
)
def _check_age(self, key):
"""Check timestamp for a key"""
if key not in self.values:
# not registered, nothing to do
return
now = time.monotonic()
timestamp = self.timestamps[key]
if self.max_age > 0 and timestamp + self.max_age < now:
self.values.pop(key)
self.timestamps.pop(key)
self._maybe_purge()
def __contains__(self, key):
"""dict check for `key in dict`"""
self._check_age(key)
return key in self.values
def __getitem__(self, key):
"""Check age before returning value"""
self._check_age(key)
return self.values[key]
def __delitem__(self, key):
del self.values[key]
del self.timestamps[key]
def get(self, key, default=None):
"""dict-like get"""
try:
return self[key]
except KeyError:
return default
def pop(self, key, default="_raise"):
"""Remove and return an item"""
if key in self:
value = self.values.pop(key)
del self.timestamps[key]
return value
else:
if default == "_raise":
raise KeyError(key)
else:
return default
def clear(self):
"""Clear the cache"""
self.values.clear()
self.timestamps.clear()
self._last_purge = time.monotonic()
# extended methods
def _maybe_purge(self):
"""purge expired values _if_ it's been purge_interval since the last purge
Called on every get/set, to keep the expired values clear.
"""
if not self.purge_interval > 0:
return
now = time.monotonic()
if self._last_purge < (now - self.purge_interval):
self.purge_expired()
def purge_expired(self):
"""Purge all expired values"""
if not self.max_age > 0:
return
now = self._last_purge = time.monotonic()
cutoff = now - self.max_age
for key in list(self.timestamps):
timestamp = self.timestamps[key]
if timestamp < cutoff:
del self[key]
class HubAuth(SingletonConfigurable):
"""A class for authenticating with JupyterHub
This can be used by any application.
Use this base class only for direct, token-authenticated applications
(web APIs).
For applications that support direct visits from browsers,
use HubOAuth to enable OAuth redirect-based authentication.
If using tornado, use via :class:`HubAuthenticated` mixin.
If using manually, use the ``.user_for_token(token_value)`` method
to identify the user owning a given token.
The following config must be set:
- api_token (token for authenticating with JupyterHub API),
fetched from the JUPYTERHUB_API_TOKEN env by default.
The following config MAY be set:
- api_url: the base URL of the Hub's internal API,
fetched from JUPYTERHUB_API_URL by default.
- cookie_cache_max_age: the number of seconds responses
from the Hub should be cached.
"""
hub_host = Unicode(
'',
help="""The public host of JupyterHub
Only used if JupyterHub is spreading servers across subdomains.
""",
).tag(config=True)
@default('hub_host')
def _default_hub_host(self):
return os.getenv('JUPYTERHUB_HOST', '')
base_url = Unicode(
os.getenv('JUPYTERHUB_SERVICE_PREFIX') or '/',
help="""The base URL prefix of this application
e.g. /services/service-name/ or /user/name/
Default: get from JUPYTERHUB_SERVICE_PREFIX
""",
).tag(config=True)
@validate('base_url')
def _add_slash(self, proposal):
"""Ensure base_url starts and ends with /"""
value = proposal['value']
if not value.startswith('/'):
value = '/' + value
if not value.endswith('/'):
value = value + '/'
return value
# where is the hub
api_url = Unicode(
os.getenv('JUPYTERHUB_API_URL') or 'http://127.0.0.1:8081/hub/api',
help="""The base API URL of the Hub.
Typically `http://hub-ip:hub-port/hub/api`
Default: $JUPYTERHUB_API_URL
""",
).tag(config=True)
@default('api_url')
def _api_url(self):
env_url = os.getenv('JUPYTERHUB_API_URL')
if env_url:
return env_url
else:
return 'http://127.0.0.1:8081' + url_path_join(self.hub_prefix, 'api')
api_token = Unicode(
help="""API key for accessing Hub API.
Default: $JUPYTERHUB_API_TOKEN
Loaded from services configuration in jupyterhub_config.
Will be auto-generated for hub-managed services.
""",
).tag(config=True)
@default("api_token")
def _default_api_token(self):
return os.getenv('JUPYTERHUB_API_TOKEN', '')
hub_prefix = Unicode(
'/hub/',
help="""The URL prefix for the Hub itself.
Typically /hub/
Default: $JUPYTERHUB_BASE_URL
""",
).tag(config=True)
@default('hub_prefix')
def _default_hub_prefix(self):
return url_path_join(os.getenv('JUPYTERHUB_BASE_URL') or '/', 'hub') + '/'
login_url = Unicode(
'',
help="""The login URL to use, if any.
The base HubAuth class doesn't support login via URL,
and will raise 403 on `@web.authenticated` requests without a valid token.
An empty string here raises 403 errors instead of redirecting.
HubOAuth will redirect to /hub/api/oauth2/authorize.
""",
).tag(config=True)
keyfile = Unicode(
os.getenv('JUPYTERHUB_SSL_KEYFILE', ''),
help="""The ssl key to use for requests
Use with certfile
""",
).tag(config=True)
certfile = Unicode(
os.getenv('JUPYTERHUB_SSL_CERTFILE', ''),
help="""The ssl cert to use for requests
Use with keyfile
""",
).tag(config=True)
client_ca = Unicode(
os.getenv('JUPYTERHUB_SSL_CLIENT_CA', ''),
help="""The ssl certificate authority to use to verify requests
Use with keyfile and certfile
""",
).tag(config=True)
allow_token_in_url = Bool(
_bool_env("JUPYTERHUB_ALLOW_TOKEN_IN_URL", default=False),
help="""Allow requests to pages with ?token=... in the URL
This allows starting a user session by sharing a URL with credentials,
bypassing authentication with the Hub.
If False, tokens in URLs will be ignored by the server,
except on websocket requests.
Has no effect on websocket requests,
which can only reliably authenticate via token in the URL,
as recommended by browser Websocket implementations.
This will default to False in JupyterHub 5.
.. versionadded:: 4.1
.. versionchanged:: 5.0
default changed to False
""",
).tag(config=True)
allow_websocket_cookie_auth = Bool(
_bool_env("JUPYTERHUB_ALLOW_WEBSOCKET_COOKIE_AUTH", default=True),
help="""Allow websocket requests with only cookie for authentication
Cookie-authenticated websockets cannot be protected from other user servers unless per-user domains are used.
Disabling cookie auth on websockets protects user servers from each other,
but may break some user applications.
Per-user domains eliminate the need to lock this down.
JupyterLab 4.1.2 and Notebook 6.5.6, 7.1.0 will not work
because they rely on cookie authentication without
API or XSRF tokens.
.. versionadded:: 4.1
""",
).tag(config=True)
cookie_options = Dict(
help="""Additional options to pass when setting cookies.
Can include things like `expires_days=None` for session-expiry
or `secure=True` if served on HTTPS and default HTTPS discovery fails
(e.g. behind some proxies).
"""
).tag(config=True)
@default('cookie_options')
def _default_cookie_options(self):
# load default from env
options_env = os.environ.get('JUPYTERHUB_COOKIE_OPTIONS')
if options_env:
return json.loads(options_env)
else:
return {}
cookie_host_prefix_enabled = Bool(
False,
help="""Enable `__Host-` prefix on authentication cookies.
The `__Host-` prefix on JupyterHub cookies provides further
protection against cookie tossing when untrusted servers
may control subdomains of your jupyterhub deployment.
_However_, it also requires that cookies be set on the path `/`,
which means they are shared by all JupyterHub components,
so a compromised server component will have access to _all_ JupyterHub-related
cookies of the visiting browser.
It is recommended to only combine `__Host-` cookies with per-user domains.
Set via $JUPYTERHUB_COOKIE_HOST_PREFIX_ENABLED
""",
).tag(config=True)
@default("cookie_host_prefix_enabled")
def _default_cookie_host_prefix_enabled(self):
return _bool_env("JUPYTERHUB_COOKIE_HOST_PREFIX_ENABLED")
@property
def cookie_path(self):
"""
Path prefix on which to set cookies
self.base_url, but '/' when cookie_host_prefix_enabled is True
"""
if self.cookie_host_prefix_enabled:
return "/"
else:
return self.base_url
cookie_cache_max_age = Integer(help="DEPRECATED. Use cache_max_age")
@observe('cookie_cache_max_age')
def _deprecated_cookie_cache(self, change):
warnings.warn(
"cookie_cache_max_age is deprecated in JupyterHub 0.8. Use cache_max_age instead."
)
self.cache_max_age = change.new
cache_max_age = Integer(
300,
help="""The maximum time (in seconds) to cache the Hub's responses for authentication.
A larger value reduces load on the Hub and occasional response lag.
A smaller value reduces propagation time of changes on the Hub (rare).
Default: 300 (five minutes)
""",
).tag(config=True)
cache = Instance(_ExpiringDict, allow_none=False)
@default('cache')
def _default_cache(self):
return _ExpiringDict(self.cache_max_age)
@property
def oauth_scopes(self):
warnings.warn(
"HubAuth.oauth_scopes is deprecated in JupyterHub 3.0. Use .access_scopes",
DeprecationWarning,
stacklevel=2,
)
return self.access_scopes
access_scopes = Set(
Unicode(),
help="""OAuth scopes to use for allowing access.
Get from $JUPYTERHUB_OAUTH_ACCESS_SCOPES by default.
""",
).tag(config=True)
@default('access_scopes')
def _default_scopes(self):
env_scopes = os.getenv('JUPYTERHUB_OAUTH_ACCESS_SCOPES')
if not env_scopes:
# deprecated name (since 3.0)
env_scopes = os.getenv('JUPYTERHUB_OAUTH_SCOPES')
if env_scopes:
return set(json.loads(env_scopes))
# scopes not specified, use service name if defined
service_name = os.getenv("JUPYTERHUB_SERVICE_NAME")
if service_name:
return {f'access:services!service={service_name}'}
return set()
_pool = Any(help="Thread pool for running async methods in the background")
@default("_pool")
def _new_pool(self):
# start a single ThreadPool in the background
from concurrent.futures import ThreadPoolExecutor
pool = ThreadPoolExecutor(1)
# create an event loop in the thread
pool.submit(self._setup_asyncio_thread).result()
return pool
def _setup_asyncio_thread(self):
"""Create asyncio loop
To be called from the background thread,
so that any thread-local state is setup correctly
"""
self._thread_loop = asyncio.new_event_loop()
def _synchronize(self, async_f, *args, **kwargs):
"""Call an async method in our background thread"""
future = self._pool.submit(
lambda: self._thread_loop.run_until_complete(async_f(*args, **kwargs))
)
return future.result()
def _call_coroutine(self, sync, async_f, *args, **kwargs):
"""Call an async coroutine function, either blocking or returning an awaitable
if not sync: calls function directly, returning awaitable
else: Block on a call in our background thread, return actual result
"""
if not sync:
return async_f(*args, **kwargs)
else:
return self._synchronize(async_f, *args, **kwargs)
async def _check_hub_authorization(
self, url, api_token, cache_key=None, use_cache=True
):
"""Identify a user with the Hub
Args:
url (str): The API URL to check the Hub for authorization
(e.g. http://127.0.0.1:8081/hub/api/user)
cache_key (str): The key for checking the cache
use_cache (bool): Specify use_cache=False to skip cached cookie values (default: True)
Returns:
user_model (dict): The user model, if a user is identified, None if authentication fails.
Raises an HTTPError if the request failed for a reason other than no such user.
"""
if use_cache:
if cache_key is None:
raise ValueError("cache_key is required when using cache")
# check for a cached reply, so we don't check with the Hub if we don't have to
try:
return self.cache[cache_key]
except KeyError:
app_log.debug("HubAuth cache miss: %s", cache_key)
data = await self._api_request(
'GET',
url,
headers={"Authorization": "token " + api_token},
allow_403=True,
)
if data is None:
app_log.warning("No Hub user identified for request")
else:
app_log.debug("Received request from Hub user %s", data)
if use_cache:
# cache result
self.cache[cache_key] = data
return data
async def _api_request(self, method, url, **kwargs):
"""Make an API request"""
allow_403 = kwargs.pop('allow_403', False)
headers = kwargs.setdefault('headers', {})
headers.setdefault('Authorization', f'token {self.api_token}')
# translate requests args to tornado's
if self.certfile:
kwargs["client_cert"] = self.certfile
if self.keyfile:
kwargs["client_key"] = self.keyfile
if self.client_ca:
kwargs["ca_certs"] = self.client_ca
req = HTTPRequest(
url,
method=method,
**kwargs,
)
try:
r = await AsyncHTTPClient().fetch(req, raise_error=False)
except Exception as e:
app_log.error("Error connecting to %s: %s", self.api_url, e)
msg = f"Failed to connect to Hub API at {self.api_url!r}."
msg += f" Is the Hub accessible at this URL (from host: {socket.gethostname()})?"
if '127.0.0.1' in self.api_url:
msg += (
" Make sure to set c.JupyterHub.hub_ip to an IP accessible to"
+ " single-user servers if the servers are not on the same host as the Hub."
)
raise HTTPError(500, msg)
data = None
try:
status = HTTPStatus(r.code)
except ValueError:
app_log.error(
f"Unknown error checking authorization with JupyterHub: {r.code}"
)
app_log.error(r.body.decode("utf8", "replace"))
response_text = r.body.decode("utf8", "replace")
if status.value == 403 and allow_403:
pass
elif status.value == 403:
app_log.error(
"I don't have permission to check authorization with JupyterHub, my auth token may have expired: [%i] %s",
status.value,
status.description,
)
app_log.error(response_text)
raise HTTPError(
500, "Permission failure checking authorization, I may need a new token"
)
elif status.value >= 500:
app_log.error(
"Upstream failure verifying auth token: [%i] %s",
status.value,
status.description,
)
app_log.error(response_text)
raise HTTPError(502, "Failed to check authorization (upstream problem)")
elif status.value >= 400:
app_log.warning(
"Failed to check authorization: [%i] %s",
status.value,
status.description,
)
app_log.warning(response_text)
msg = "Failed to check authorization"
# pass on error from oauth failure
try:
response = json.loads(response_text)
# prefer more specific 'error_description', fallback to 'error'
description = response.get(
"error_description", response.get("error", "Unknown error")
)
except Exception:
pass
else:
msg += ": " + description
raise HTTPError(500, msg)
else:
data = json.loads(response_text)
return data
def user_for_cookie(self, encrypted_cookie, use_cache=True, session_id=''):
"""Deprecated and removed. Use HubOAuth to authenticate browsers."""
raise RuntimeError(
"Identifying users by shared cookie is removed in JupyterHub 2.0. Use OAuth tokens."
)
def user_for_token(self, token, use_cache=True, session_id='', *, sync=True):
"""Ask the Hub to identify the user for a given token.
.. versionadded:: 2.4
async support via `sync` argument.
Args:
token (str): the token
use_cache (bool): Specify use_cache=False to skip cached cookie values (default: True)
sync (bool): whether to block for the result or return an awaitable
Returns:
user_model (dict): The user model, if a user is identified, None if authentication fails.
The 'name' field contains the user's name.
"""
return self._call_coroutine(
sync,
self._check_hub_authorization,
url=url_path_join(
self.api_url,
"user",
),
api_token=token,
cache_key='token:{}:{}'.format(
session_id,
hashlib.sha256(token.encode("utf8", "replace")).hexdigest(),
),
use_cache=use_cache,
)
auth_header_name = 'Authorization'
auth_header_pat = re.compile(r'(?:token|bearer)\s+(.+)', re.IGNORECASE)
def _get_token_url(self, handler):
"""Get the token from the URL
Always run for websockets,
otherwise run only if self.allow_token_in_url
"""
fetch_mode = handler.request.headers.get("Sec-Fetch-Mode", "unspecified")
if self.allow_token_in_url or fetch_mode == "websocket":
return handler.get_argument("token", "")
return ""
def get_token(self, handler, in_cookie=True):
"""Get the token authenticating a request
.. versionchanged:: 2.2
in_cookie added.
Previously, only URL params and header were considered.
Pass `in_cookie=False` to preserve that behavior.
- in URL parameters: ?token=<token>
- in header: Authorization: token <token>
- in cookie (stored after oauth), if in_cookie is True
Args:
handler (tornado.web.RequestHandler): the current request handler
"""
user_token = self._get_token_url(handler)
if not user_token:
# get it from Authorization header
m = self.auth_header_pat.match(
handler.request.headers.get(self.auth_header_name, '')
)
if m:
user_token = m.group(1)
if not user_token and in_cookie:
user_token = self._get_token_cookie(handler)
return user_token
def _get_token_cookie(self, handler):
"""Base class doesn't store tokens in cookies"""
return None
async def _get_user_cookie(self, handler):
"""Get the user model from a cookie"""
# overridden in HubOAuth to store the access token after oauth
return None
def get_session_id(self, handler):
"""Get the jupyterhub session id
from the jupyterhub-session-id cookie.
Args:
handler (tornado.web.RequestHandler): the current request handler
"""
return handler.get_cookie('jupyterhub-session-id', '')
def get_user(self, handler, *, sync=True):
"""Get the Hub user for a given tornado handler.
Checks cookie with the Hub to identify the current user.
.. versionadded:: 2.4
async support via `sync` argument.
Args:
handler (tornado.web.RequestHandler): the current request handler
sync (bool): whether to block for the result or return an awaitable
Returns:
user_model (dict): The user model, if a user is identified, None if authentication fails.
The 'name' field contains the user's name.
"""
return self._call_coroutine(sync, self._get_user, handler)
def _patch_xsrf(self, handler):
"""Overridden in HubOAuth
HubAuth base class doesn't handle xsrf,
which is only relevant for cookie-based auth
"""
return
async def _get_user(self, handler):
# only allow this to be called once per handler
# avoids issues if an error is raised,
# since this may be called again when trying to render the error page
if hasattr(handler, '_cached_hub_user'):
return handler._cached_hub_user
# patch XSRF checks, which will apply after user check
self._patch_xsrf(handler)
handler._cached_hub_user = user_model = None
session_id = self.get_session_id(handler)
# check token first, ignoring cookies
# because some checks are different when a request
# is token-authenticated (CORS-related)
token = self.get_token(handler, in_cookie=False)
if token:
user_model = await self.user_for_token(
token, session_id=session_id, sync=False
)
if user_model:
handler._token_authenticated = True
# no token, check cookie
if user_model is None:
user_model = await self._get_user_cookie(handler)
# cache result
handler._cached_hub_user = user_model
if not user_model:
app_log.debug("No user identified")
return user_model
def check_scopes(self, required_scopes, user):
"""Check whether the user has required scope(s)"""
return check_scopes(required_scopes, set(user["scopes"]))
def _persist_url_token_if_set(self, handler):
"""Persist ?token=... from URL in cookie if set
for use in future cookie-authenticated requests.
Allows initiating an authenticated session
via /user/name/?token=abc...,
otherwise only the initial request will be authenticated.
No-op if no token URL parameter is given.
"""
url_token = handler.get_argument('token', '')
if not url_token:
# no token to persist
return
# only do this if the token in the URL is the source of authentication
if not getattr(handler, '_token_authenticated', False):
return
if not hasattr(self, 'set_cookie'):
# only HubOAuth can persist cookies
return
fetch_mode = handler.request.headers.get("Sec-Fetch-Mode", "navigate")
if isinstance(handler, WebSocketHandler) or fetch_mode != "navigate":
# don't do this on websockets or non-navigate requests
return
self.log.info(
"Storing token from url in cookie for %s",
handler.request.remote_ip,
)
self.set_cookie(handler, url_token)
class HubOAuth(HubAuth):
"""HubAuth using OAuth for login instead of cookies set by the Hub.
Use this class if you want users to be able to visit your service with a browser.
They will be authenticated via OAuth with the Hub.
.. versionadded: 0.8
"""
# Overrides of HubAuth API
@default('login_url')
def _login_url(self):
return url_concat(
self.oauth_authorization_url,
{
'client_id': self.oauth_client_id,
'redirect_uri': self.oauth_redirect_uri,
'response_type': 'code',
},
)
@property
def cookie_name(self):
"""Use OAuth client_id for cookie name
because we don't want to use the same cookie name
across OAuth clients.
"""
cookie_name = self.oauth_client_id
if self.cookie_host_prefix_enabled:
cookie_name = "__Host-" + cookie_name
return cookie_name
@property
def state_cookie_name(self):
"""The cookie name for storing OAuth state
This cookie is only live for the duration of the OAuth handshake.
"""
return self.cookie_name + '-oauth-state'
def _get_token_cookie(self, handler):
"""Base class doesn't store tokens in cookies"""
if hasattr(handler, "_hub_auth_token_cookie"):
return handler._hub_auth_token_cookie
fetch_mode = handler.request.headers.get("Sec-Fetch-Mode", "unset")
if fetch_mode == "websocket" and not self.allow_websocket_cookie_auth:
# disallow cookie auth on websockets
return None
token = handler.get_secure_cookie(self.cookie_name)
if token:
# decode cookie bytes
token = token.decode('ascii', 'replace')
return token
def _get_xsrf_token_id(self, handler):
"""Get contents for xsrf token for a given Handler
This is the value to be encrypted & signed in the xsrf token
"""
token = self._get_token_cookie(handler)
session_id = self.get_session_id(handler)
if token:
token_hash = hashlib.sha256(token.encode("ascii", "replace")).hexdigest()
if not session_id:
session_id = _anonymous_xsrf_id(handler)
else:
token_hash = _anonymous_xsrf_id(handler)
return f"{session_id}:{token_hash}".encode("ascii", "replace")
def _patch_xsrf(self, handler):
"""Patch handler to inject JuptyerHub xsrf token behavior"""
if isinstance(handler, HubAuthenticated):
# doesn't need patch
return
# patch in our xsrf token handling
# overrides tornado and jupyter_server defaults,
# but not others.
# subclasses will still inherit our overridden behavior,
# but their overrides (if any) will take precedence over ours
# such as jupyter-server-proxy
for cls in handler.__class__.__mro__:
# search for the nearest parent class defined
# in one of the 'base' Handler-defining packages.
# In current implementations, this will
# generally be jupyter_server.base.handlers.JupyterHandler
# or tornado.web.RequestHandler,
# but doing it this way ensures consistent results
if (cls.__module__ or '').partition('.')[0] not in {
"jupyter_server",
"notebook",
"tornado",
}:
continue
# override check_xsrf_cookie where it's defined
if "check_xsrf_cookie" in cls.__dict__:
if "_get_xsrf_token_id" in cls.__dict__:
# already patched
return
cls._xsrf_token_id = property(self._get_xsrf_token_id)
cls.xsrf_token = property(
partial(get_xsrf_token, cookie_path=self.base_url)
)
cls.check_xsrf_cookie = lambda handler: self.check_xsrf_cookie(handler)
def check_xsrf_cookie(self, handler):
"""check_xsrf_cookie patch
Applies JupyterHub check_xsrf_cookie if not token authenticated
"""
if getattr(handler, '_token_authenticated', False) or handler.settings.get(
"disable_check_xsrf", False
):
return
check_xsrf_cookie(handler)
def _clear_cookie(self, handler, cookie_name, **kwargs):
"""Clear a cookie, handling __Host- prefix"""
# Set-Cookie is rejected without 'secure',
# this includes clearing cookies!
if cookie_name.startswith("__Host-"):
kwargs["path"] = "/"
kwargs["secure"] = True
return handler.clear_cookie(cookie_name, **kwargs)
async def _get_user_cookie(self, handler):
# check xsrf if needed
token = self._get_token_cookie(handler)
session_id = self.get_session_id(handler)
if token and _needs_check_xsrf(handler):
# call handler.check_xsrf_cookie instead of self.check_xsrf_cookie
# to allow subclass overrides
try:
handler.check_xsrf_cookie()
except HTTPError as e:
self.log.debug(
f"Not accepting cookie auth on {handler.request.method} {handler.request.path}: {e.log_message}"
)
# don't proceed with cookie auth unless xsrf is okay
# don't raise either, because that makes a mess
return None
if token:
user_model = await self.user_for_token(
token, session_id=session_id, sync=False
)
if user_model is None:
app_log.warning("Token stored in cookie may have expired")
self._clear_cookie(handler, self.cookie_name, path=self.cookie_path)
return user_model
# HubOAuth API
oauth_client_id = Unicode(
help="""The OAuth client ID for this application.
Use JUPYTERHUB_CLIENT_ID by default.
"""
).tag(config=True)
@default('oauth_client_id')
def _client_id(self):
return os.getenv('JUPYTERHUB_CLIENT_ID', '')
@validate('oauth_client_id', 'api_token')
def _ensure_not_empty(self, proposal):
if not proposal.value:
raise ValueError(f"{proposal.trait.name} cannot be empty.")
return proposal.value
oauth_redirect_uri = Unicode(
help="""OAuth redirect URI
Should generally be /base_url/oauth_callback
"""
).tag(config=True)
@default('oauth_redirect_uri')
def _default_redirect(self):
return os.getenv('JUPYTERHUB_OAUTH_CALLBACK_URL') or url_path_join(
self.base_url, 'oauth_callback'
)
oauth_authorization_url = Unicode(
'/hub/api/oauth2/authorize',
help="The URL to redirect to when starting the OAuth process",
).tag(config=True)
@default('oauth_authorization_url')
def _auth_url(self):
return self.hub_host + url_path_join(self.hub_prefix, 'api/oauth2/authorize')
oauth_token_url = Unicode(
help="""The URL for requesting an OAuth token from JupyterHub"""
).tag(config=True)
@default('oauth_token_url')
def _token_url(self):
return url_path_join(self.api_url, 'oauth2/token')
def token_for_code(self, code, *, sync=True):
"""Get token for OAuth temporary code
This is the last step of OAuth login.
Should be called in OAuth Callback handler.
Args:
code (str): oauth code for finishing OAuth login
Returns:
token (str): JupyterHub API Token
"""
return self._call_coroutine(sync, self._token_for_code, code)
async def _token_for_code(self, code):
# GitHub specifies a POST request yet requires URL parameters
params = dict(
client_id=self.oauth_client_id,
client_secret=self.api_token,
grant_type='authorization_code',
code=code,
redirect_uri=self.oauth_redirect_uri,
)
token_reply = await self._api_request(
'POST',
self.oauth_token_url,
body=urlencode(params).encode('utf8'),
headers={'Content-Type': 'application/x-www-form-urlencoded'},
)
return token_reply['access_token']
# state-related
oauth_state_max_age = Integer(
600,
config=True,
help="""Max age (seconds) of oauth state.
Governs both oauth state cookie Max-Age,
as well as the in-memory _oauth_states cache.
""",
)
_oauth_states = Instance(
_ExpiringDict,
allow_none=False,
help="""
Store oauth state info for each oauth request, such as next_url
The oauth state field only contains the oauth state _id_ of each pending request,
while other information such as the cookie name, next_url
are stored in this dictionary.
""",
)
@default('_oauth_states')
def _default_oauth_states(self):
return _ExpiringDict(max_age=self.oauth_state_max_age)
def set_state_cookie(self, handler, next_url=None):
"""Generate an OAuth state and store it in a cookie
Parameters
----------
handler : RequestHandler
A tornado RequestHandler
next_url : str
The page to redirect to on successful login
Returns
-------
state : str
The OAuth state that has been stored in the cookie (url safe, base64-encoded)
"""
extra_state = {}
if handler.get_cookie(self.state_cookie_name):
# oauth state cookie is already set
# use a randomized cookie suffix to avoid collisions
# in case of concurrent logins
app_log.warning("Detected unused OAuth state cookies")
cookie_suffix = ''.join(
random.choice(string.ascii_letters) for i in range(8)
)
cookie_name = f'{self.state_cookie_name}-{cookie_suffix}'
extra_state['cookie_name'] = cookie_name
else:
cookie_name = self.state_cookie_name
state_id = self.generate_state(next_url, **extra_state)
kwargs = {
'path': self.cookie_path,
'httponly': True,
# Expire oauth state cookie in ten minutes.
# Usually this will be cleared by completed login
# in less than a few seconds.
# OAuth that doesn't complete shouldn't linger too long.
'max_age': 600,
}
public_url = os.getenv("JUPYTERHUB_PUBLIC_URL")
if public_url:
if urlparse(public_url).scheme == 'https':
kwargs['secure'] = True
else:
if get_browser_protocol(handler.request) == 'https':
kwargs['secure'] = True
# don't allow overriding some fields
no_override_keys = set(kwargs.keys()) | {"expires_days", "expires"}
# load user cookie overrides
for key, value in self.cookie_options.items():
# don't include overrides
if key.lower() not in no_override_keys:
kwargs[key] = value
handler.set_secure_cookie(cookie_name, state_id, **kwargs)
return state_id
def generate_state(self, next_url=None, **extra_state):
"""Generate a state string, given a next_url redirect target
The state info is stored locally in self._oauth_states,
and only the state id is returned for use in the oauth state field (cookie, redirect param)
Parameters
----------
next_url : str
The URL of the page to redirect to on successful login.
Returns
-------
state_id (str): The state string to be used as a cookie value.
"""
state_id = secrets.token_urlsafe(16)
state = {'next_url': next_url}
if extra_state:
state.update(extra_state)
self._oauth_states[state_id] = state
return state_id
def clear_oauth_state(self, state_id):
"""Clear persisted oauth state"""
self._oauth_states.pop(state_id, None)
self._oauth_states.purge_expired()
def clear_oauth_state_cookies(self, handler):
"""Clear persisted oauth state"""
for cookie_name, cookie in handler.request.cookies.items():
if cookie_name.startswith(self.state_cookie_name):
self._clear_cookie(
handler,
cookie_name,
path=self.cookie_path,
)
def _decode_state(self, state_id, /):
return self._oauth_states.get(state_id, {})
def get_next_url(self, state_id='', /):
"""Get the next_url for redirection, given an encoded OAuth state"""
state = self._decode_state(state_id)
return state.get('next_url') or self.base_url
def get_state_cookie_name(self, state_id='', /):
"""Get the cookie name for oauth state, given an encoded OAuth state
Cookie name is stored in the state itself because the cookie name
is randomized to deal with races between concurrent oauth sequences.
"""
state = self._decode_state(state_id)
return state.get('cookie_name') or self.state_cookie_name
def set_cookie(self, handler, access_token):
"""Set a cookie recording OAuth result"""
kwargs = {'path': self.cookie_path, 'httponly': True}
if (
get_browser_protocol(handler.request) == 'https'
or self.cookie_host_prefix_enabled
):
kwargs['secure'] = True
# load user cookie overrides
kwargs.update(self.cookie_options)
app_log.debug(
"Setting oauth cookie for %s: %s, %s",
handler.request.remote_ip,
self.cookie_name,
kwargs,
)
handler.set_secure_cookie(self.cookie_name, access_token, **kwargs)
# set updated xsrf token cookie,
# which changes after login
handler._hub_auth_token_cookie = access_token
_set_xsrf_cookie(
handler,
handler._xsrf_token_id,
cookie_path=self.base_url,
authenticated=True,
)
def clear_cookie(self, handler):
"""Clear the OAuth cookie
Args:
handler (tornado.web.RequestHandler): the current request handler
"""
self._clear_cookie(handler, self.cookie_name, path=self.cookie_path)
class UserNotAllowed(Exception):
"""Exception raised when a user is identified and not allowed"""
def __init__(self, model):
self.model = model
def __str__(self):
return '<{cls} {kind}={name}>'.format(
cls=self.__class__.__name__,
kind=self.model['kind'],
name=self.model['name'],
)
class HubAuthenticated:
"""Mixin for tornado handlers that are authenticated with JupyterHub
A handler that mixes this in must have the following attributes/properties:
- .hub_auth: A HubAuth instance
- .hub_scopes: A set of JupyterHub 2.0 OAuth scopes to allow.
Default comes from .hub_auth.oauth_access_scopes,
which in turn is set by $JUPYTERHUB_OAUTH_ACCESS_SCOPES
Default values include:
- 'access:services', 'access:services!service={service_name}' for services
- 'access:servers', 'access:servers!user={user}',
'access:servers!server={user}/{server_name}'
for single-user servers
If hub_scopes is not used (e.g. JupyterHub 1.x),
these additional properties can be used:
- .allow_admin: If True, allow any admin user.
Default: False.
- .hub_users: A set of usernames to allow.
If left unspecified or None, username will not be checked.
- .hub_groups: A set of group names to allow.
If left unspecified or None, groups will not be checked.
- .allow_admin: Is admin user access allowed or not
If left unspecified or False, admin user won't have an access.
Examples::
class MyHandler(HubAuthenticated, web.RequestHandler):
def initialize(self, hub_auth):
self.hub_auth = hub_auth
@web.authenticated
def get(self):
...
"""
# deprecated, pre-2.0 allow sets
hub_services = None # set of allowed services
hub_users = None # set of allowed users
hub_groups = None # set of allowed groups
allow_admin = False # allow any admin user access
@property
def hub_scopes(self):
"""Set of allowed scopes (use hub_auth.access_scopes by default)"""
return self.hub_auth.access_scopes or None
@property
def allow_all(self):
"""Property indicating that all successfully identified user
or service should be allowed.
"""
return (
self.hub_scopes is None
and self.hub_services is None
and self.hub_users is None
and self.hub_groups is None
and not self.allow_admin
)
# self.hub_auth must be a HubAuth instance.
# If nothing specified, use default config,
# which will be configured with defaults
# based on JupyterHub environment variables for services.
_hub_auth = None
hub_auth_class = HubAuth
@property
def hub_auth(self):
if self._hub_auth is None:
self._hub_auth = self.hub_auth_class.instance()
return self._hub_auth
@hub_auth.setter
def hub_auth(self, auth):
self._hub_auth = auth
_hub_login_url = None
def get_login_url(self):
"""Return the Hub's login URL"""
if self._hub_login_url is not None:
# cached value, don't call this more than once per handler
return self._hub_login_url
if not self.hub_auth.login_url:
# HubOAuth is required for login via redirect,
# base class can only raise to avoid redirect loops
raise HTTPError(403)
# temporary override at setting level,
# to allow any subclass overrides of get_login_url to preserve their effect
# for example, APIHandler raises 403 to prevent redirects
with mock.patch.dict(
self.application.settings, {"login_url": self.hub_auth.login_url}
):
login_url = super().get_login_url()
app_log.debug("Redirecting to login url: %s", login_url)
if isinstance(self.hub_auth, HubOAuth):
# add state argument to OAuth url
# must do this _after_ allowing get_login_url to raise
# so we don't set unused cookies
state = self.hub_auth.set_state_cookie(self, next_url=self.request.uri)
login_url = url_concat(login_url, {'state': state})
self._hub_login_url = login_url
return login_url
def check_hub_user(self, model):
"""Check whether Hub-authenticated user or service should be allowed.
Returns the input if the user should be allowed, None otherwise.
Override for custom logic in authenticating users.
Args:
user_model (dict): the user or service model returned from :class:`HubAuth`
Returns:
user_model (dict): The user model if the user should be allowed, None otherwise.
"""
name = model['name']
kind = model.setdefault('kind', 'user')
if self.allow_all:
app_log.debug(
"Allowing Hub %s %s (all Hub users and services allowed)", kind, name
)
return model
if self.hub_scopes:
scopes = self.hub_auth.check_scopes(self.hub_scopes, model)
if scopes:
app_log.debug(
f"Allowing Hub {kind} {name} based on oauth scopes {scopes}"
)
return model
else:
app_log.warning(
f"Not allowing Hub {kind} {name}: missing required scopes"
)
app_log.debug(
f"Hub {kind} {name} needs scope(s) {self.hub_scopes}, has scope(s) {model['scopes']}"
)
# if hub_scopes are used, *only* hub_scopes are used
# note: this means successful authentication, but insufficient permission
raise UserNotAllowed(model)
# proceed with the pre-2.0 way if hub_scopes is not set
warnings.warn(
"hub_scopes ($JUPYTERHUB not set, proceeding with pre-2.0 authentication",
DeprecationWarning,
)
if self.allow_admin and model.get('admin', False):
app_log.debug("Allowing Hub admin %s", name)
return model
if kind == 'service':
# it's a service, check hub_services
if self.hub_services and name in self.hub_services:
app_log.debug("Allowing Hub service %s", name)
return model
else:
app_log.warning("Not allowing Hub service %s", name)
raise UserNotAllowed(model)
if self.hub_users and name in self.hub_users:
# user in allowed list
app_log.debug("Allowing Hub user %s", name)
return model
elif self.hub_groups and set(model['groups']).intersection(self.hub_groups):
allowed_groups = set(model['groups']).intersection(self.hub_groups)
app_log.debug(
"Allowing Hub user %s in group(s) %s",
name,
','.join(sorted(allowed_groups)),
)
# group in allowed list
return model
else:
app_log.warning("Not allowing Hub user %s", name)
raise UserNotAllowed(model)
def get_current_user(self):
"""Tornado's authentication method
Returns:
user_model (dict): The user model, if a user is identified, None if authentication fails.
"""
if hasattr(self, '_hub_auth_user_cache'):
return self._hub_auth_user_cache
user_model = self.hub_auth.get_user(self)
if not user_model:
self._hub_auth_user_cache = None
return
try:
self._hub_auth_user_cache = self.check_hub_user(user_model)
except UserNotAllowed:
# cache None, in case get_user is called again while processing the error
self._hub_auth_user_cache = None
# Override redirect so if/when tornado @web.authenticated
# tries to redirect to login URL, 403 will be raised instead.
# This is not the best, but avoids problems that can be caused
# when get_current_user is allowed to raise.
def raise_on_redirect(*args, **kwargs):
raise HTTPError(
403, "{kind} {name} is not allowed.".format(**user_model)
)
self.redirect = raise_on_redirect
return
except Exception:
self._hub_auth_user_cache = None
raise
self.hub_auth._persist_url_token_if_set(self)
return self._hub_auth_user_cache
@property
def _xsrf_token_id(self):
if hasattr(self, "__xsrf_token_id"):
return self.__xsrf_token_id
if not isinstance(self.hub_auth, HubOAuth):
return ""
return self.hub_auth._get_xsrf_token_id(self)
@_xsrf_token_id.setter
def _xsrf_token_id(self, value):
self.__xsrf_token_id = value
@property
def xsrf_token(self):
return get_xsrf_token(self, cookie_path=self.hub_auth.base_url)
def check_xsrf_cookie(self):
return self.hub_auth.check_xsrf_cookie(self)
class HubOAuthenticated(HubAuthenticated):
"""Simple subclass of HubAuthenticated using OAuth instead of old shared cookies"""
hub_auth_class = HubOAuth
class HubOAuthCallbackHandler(HubOAuthenticated, RequestHandler):
"""OAuth Callback handler
Finishes the OAuth flow, setting a cookie to record the user's info.
Should be registered at ``SERVICE_PREFIX/oauth_callback``
.. versionadded: 0.8
"""
async def get(self):
error = self.get_argument("error", False)
if error:
msg = self.get_argument("error_description", error)
raise HTTPError(400, f"Error in oauth: {msg}")
code = self.get_argument("code", False)
if not code:
raise HTTPError(400, "OAuth callback made without a token")
# validate OAuth state
arg_state = self.get_argument("state", None)
if arg_state is None:
raise HTTPError(400, "OAuth state is missing. Try logging in again.")
cookie_name = self.hub_auth.get_state_cookie_name(arg_state)
cookie_state = self.get_secure_cookie(cookie_name)
# clear cookie state now that we've consumed it
if cookie_state:
self.hub_auth.clear_oauth_state_cookies(self)
else:
# completing oauth with stale state, but already logged in.
# stop here and redirect to default URL
# don't complete oauth (no new token), but do complete redirecting to the destination
if self.current_user:
app_log.warning("Attempting oauth completion after already logging in.")
self.hub_auth.clear_oauth_state_cookies(self)
next_url = self.hub_auth.get_next_url(arg_state)
self.redirect(next_url)
return
if isinstance(cookie_state, bytes):
cookie_state = cookie_state.decode('ascii', 'replace')
# check that state matches
if arg_state != cookie_state:
app_log.warning(
"oauth state argument %r != cookie %s=%r",
arg_state,
cookie_name,
cookie_state,
)
raise HTTPError(403, "oauth state does not match. Try logging in again.")
next_url = self.hub_auth.get_next_url(cookie_state)
# clear consumed state from _oauth_states cache now that we're done with it
self.hub_auth.clear_oauth_state(cookie_state)
# clear _all_ oauth state cookies on success
# This prevents multiple concurrent logins in the same browser,
# which is probably okay.
self.hub_auth.clear_oauth_state_cookies(self)
token = await self.hub_auth.token_for_code(code, sync=False)
session_id = self.hub_auth.get_session_id(self)
user_model = await self.hub_auth.user_for_token(
token, session_id=session_id, sync=False
)
if user_model is None:
raise HTTPError(500, "oauth callback failed to identify a user")
app_log.info("Logged-in user %s", user_model['name'])
app_log.debug("User model %s", user_model)
self.hub_auth.set_cookie(self, token)
self.redirect(next_url or self.hub_auth.base_url)