From e0ea52af49ced2f332413d3e661a8391a6c77f7d Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 25 Mar 2024 12:10:07 +0100 Subject: [PATCH 1/2] rework handling of multiple xsrf tokens rather than attempting to clear multiple tokens (too complicated, breaks named servers) look for and accept first valid token have to do our own cookie parsing because existing cookie implementations only return a single value for each key and default to selecting the _least_ likely to be correct, according to RFCs. set updated xsrf cookie on login to avoid needing two requests to get the right cookie --- jupyterhub/_xsrf_utils.py | 131 ++++++++++++----------- jupyterhub/handlers/base.py | 12 ++- jupyterhub/services/auth.py | 17 ++- jupyterhub/tests/browser/test_browser.py | 101 +++++++++++------ jupyterhub/tests/conftest.py | 2 - jupyterhub/tests/test_services_auth.py | 13 +-- 6 files changed, 167 insertions(+), 109 deletions(-) diff --git a/jupyterhub/_xsrf_utils.py b/jupyterhub/_xsrf_utils.py index 05bf2b04..07fb3139 100644 --- a/jupyterhub/_xsrf_utils.py +++ b/jupyterhub/_xsrf_utils.py @@ -10,11 +10,9 @@ in both Hub and single-user code import base64 import hashlib -from datetime import datetime, timedelta, timezone from http.cookies import SimpleCookie from tornado import web -from tornado.httputil import format_timestamp from tornado.log import app_log @@ -60,41 +58,69 @@ def _create_signed_value_urlsafe(handler, name, value): return base64.urlsafe_b64encode(signed_value).rstrip(b"=") -def _clear_invalid_xsrf_cookie(handler, cookie_path): +def _get_xsrf_token_cookie(handler): """ - Clear invalid XSRF cookie + Get the _valid_ XSRF token and id from Cookie - This may an old XSRF token, or one set on / by another application. - Because we cannot trust browsers or tornado to give us the more specific cookie, - try to clear _both_ on / and on our prefix, - then reload the page. + Returns (xsrf_token, xsrf_id) found in Cookies header. + + multiple xsrf cookies may be set on multiple paths; + + RFC 6265 states that they should be in order of more specific path to less, + but ALSO states that servers should never rely on order. + + Tornado (6.4) and stdlib (3.12) SimpleCookie explicitly use the _last_ value, + which means the cookie with the _least_ specific prefix will be used if more than one is present. + + Because we sign values, we can get the first valid cookie and not worry about order too much. + + This is simplified from tornado's HTTPRequest.cookies property + only looking for a single cookie. """ - expired = format_timestamp(datetime.now(timezone.utc) - timedelta(days=366)) - cookie = SimpleCookie() - cookie["_xsrf"] = "" - morsel = cookie["_xsrf"] - morsel["expires"] = expired - morsel["path"] = "/" - # use Set-Cookie directly, - # because tornado's set_cookie and clear_cookie use a single _dict_, - # so we can't clear a cookie on multiple paths and then set it - handler.add_header("Set-Cookie", morsel.OutputString(None)) - if cookie_path != "/": - # clear it multiple times! - morsel["path"] = cookie_path - handler.add_header("Set-Cookie", morsel.OutputString(None)) + if "Cookie" not in handler.request.headers: + return (None, None) - if ( - handler.request.method.lower() == "get" - and handler.request.headers.get("Sec-Fetch-Mode", "navigate") == "navigate" - ): - # reload current page because any subsequent set_cookie - # will cancel the clearing of the cookie - # this only makes sense on GET requests - handler.redirect(handler.request.uri) - # halt any other processing of the request - raise web.Finish() + for chunk in handler.request.headers["Cookie"].split(";"): + key = chunk.partition("=")[0].strip() + if key != "_xsrf": + # we are only looking for the _xsrf cookie + # ignore everything else + continue + + # use stdlib parsing to handle quotes, validation, etc. + try: + xsrf_token = SimpleCookie(chunk)[key].value.encode("ascii") + except (ValueError, KeyError): + continue + + xsrf_token_id = _get_signed_value_urlsafe(handler, "_xsrf", xsrf_token) + + if xsrf_token_id: + # only return if we found a _valid_ xsrf cookie + # otherwise, keep looking + return (xsrf_token, xsrf_token_id) + # no valid token found found + return (None, None) + + +def _set_xsrf_cookie(handler, xsrf_id, cookie_path=""): + """Set xsrf token cookie""" + xsrf_token = _create_signed_value_urlsafe(handler, "_xsrf", xsrf_id) + xsrf_cookie_kwargs = {} + xsrf_cookie_kwargs.update(handler.settings.get('xsrf_cookie_kwargs', {})) + xsrf_cookie_kwargs.setdefault("path", cookie_path) + if not handler.current_user: + # limit anonymous xsrf cookies to one hour + xsrf_cookie_kwargs.pop("expires", None) + xsrf_cookie_kwargs.pop("expires_days", None) + xsrf_cookie_kwargs["max_age"] = 3600 + app_log.info( + "Setting new xsrf cookie for %r %r", + xsrf_id, + xsrf_cookie_kwargs, + ) + handler.set_cookie("_xsrf", xsrf_token, **xsrf_cookie_kwargs) def get_xsrf_token(handler, cookie_path=""): @@ -110,23 +136,8 @@ def get_xsrf_token(handler, cookie_path=""): _set_cookie = False # the raw cookie is the token - xsrf_token = xsrf_cookie = handler.get_cookie("_xsrf") - if xsrf_token: - try: - xsrf_token = xsrf_token.encode("ascii") - except UnicodeEncodeError: - xsrf_token = None - - xsrf_id_cookie = _get_signed_value_urlsafe(handler, "_xsrf", xsrf_token) - if xsrf_cookie and not xsrf_id_cookie: - # we have a cookie, but it's invalid! - # handle possibility of _xsrf being set multiple times, - # e.g. on / and on /hub/ - # this will reload the page if it's a GET request - app_log.warning( - "Attempting to clear invalid _xsrf cookie %r", xsrf_cookie[:4] + "..." - ) - _clear_invalid_xsrf_cookie(handler, cookie_path) + xsrf_token, xsrf_id_cookie = _get_xsrf_token_cookie(handler) + cookie_token = xsrf_token # check the decoded, signed value for validity xsrf_id = handler._xsrf_token_id @@ -146,22 +157,16 @@ def get_xsrf_token(handler, cookie_path=""): _set_cookie = ( handler.request.headers.get("Sec-Fetch-Mode", "navigate") == "navigate" ) + if xsrf_id_cookie and not _set_cookie: + # if we aren't setting a cookie here but we got one, + # this means things probably aren't going to work + app_log.warning( + "Not accepting incorrect xsrf token id in cookie on %s", + handler.request.path, + ) if _set_cookie: - xsrf_cookie_kwargs = {} - xsrf_cookie_kwargs.update(handler.settings.get('xsrf_cookie_kwargs', {})) - xsrf_cookie_kwargs.setdefault("path", cookie_path) - if not handler.current_user: - # limit anonymous xsrf cookies to one hour - xsrf_cookie_kwargs.pop("expires", None) - xsrf_cookie_kwargs.pop("expires_days", None) - xsrf_cookie_kwargs["max_age"] = 3600 - app_log.info( - "Setting new xsrf cookie for %r %r", - xsrf_id, - xsrf_cookie_kwargs, - ) - handler.set_cookie("_xsrf", xsrf_token, **xsrf_cookie_kwargs) + _set_xsrf_cookie(handler, xsrf_id, cookie_path) handler._xsrf_token = xsrf_token return xsrf_token diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 03e3d7a5..2f6b8033 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -24,7 +24,12 @@ from tornado.log import app_log from tornado.web import RequestHandler, addslash from .. import __version__, orm, roles, scopes -from .._xsrf_utils import _anonymous_xsrf_id, check_xsrf_cookie, get_xsrf_token +from .._xsrf_utils import ( + _anonymous_xsrf_id, + _set_xsrf_cookie, + check_xsrf_cookie, + get_xsrf_token, +) from ..metrics import ( PROXY_ADD_DURATION_SECONDS, PROXY_DELETE_DURATION_SECONDS, @@ -730,6 +735,11 @@ class BaseHandler(RequestHandler): if not self.get_current_user_cookie(): self.set_hub_cookie(user) + # make sure xsrf cookie is updated + # this avoids needing a second request to set the right xsrf cookie + self._jupyterhub_user = user + _set_xsrf_cookie(self, self._xsrf_token_id, cookie_path=self.hub.base_url) + def authenticate(self, data): return maybe_future(self.authenticator.get_authenticated_user(self, data)) diff --git a/jupyterhub/services/auth.py b/jupyterhub/services/auth.py index 98ba335a..999b9f3b 100644 --- a/jupyterhub/services/auth.py +++ b/jupyterhub/services/auth.py @@ -59,7 +59,12 @@ from traitlets import ( ) from traitlets.config import SingletonConfigurable -from .._xsrf_utils import _anonymous_xsrf_id, check_xsrf_cookie, get_xsrf_token +from .._xsrf_utils import ( + _anonymous_xsrf_id, + _set_xsrf_cookie, + check_xsrf_cookie, + get_xsrf_token, +) from ..scopes import _intersect_expanded_scopes from ..utils import _bool_env, get_browser_protocol, url_path_join @@ -851,6 +856,8 @@ class HubOAuth(HubAuth): def _get_token_cookie(self, handler): """Base class doesn't store tokens in cookies""" + if hasattr(handler, "_hub_auth_token_cookie"): + return handler._hub_auth_token_cookie fetch_mode = handler.request.headers.get("Sec-Fetch-Mode", "unset") if fetch_mode == "websocket" and not self.allow_websocket_cookie_auth: @@ -962,8 +969,8 @@ class HubOAuth(HubAuth): try: handler.check_xsrf_cookie() except HTTPError as e: - self.log.error( - f"Not accepting cookie auth on {handler.request.method} {handler.request.path}: {e}" + self.log.debug( + f"Not accepting cookie auth on {handler.request.method} {handler.request.path}: {e.log_message}" ) # don't proceed with cookie auth unless xsrf is okay # don't raise either, because that makes a mess @@ -1187,6 +1194,10 @@ class HubOAuth(HubAuth): kwargs, ) handler.set_secure_cookie(self.cookie_name, access_token, **kwargs) + # set updated xsrf token cookie, + # which changes after login + handler._hub_auth_token_cookie = access_token + _set_xsrf_cookie(handler, handler._xsrf_token_id, cookie_path=self.base_url) def clear_cookie(self, handler): """Clear the OAuth cookie""" diff --git a/jupyterhub/tests/browser/test_browser.py b/jupyterhub/tests/browser/test_browser.py index c29ffb14..33269d35 100644 --- a/jupyterhub/tests/browser/test_browser.py +++ b/jupyterhub/tests/browser/test_browser.py @@ -12,6 +12,7 @@ from tornado.escape import url_escape from tornado.httputil import url_concat from jupyterhub import orm, roles, scopes +from jupyterhub.tests.test_named_servers import named_servers # noqa from jupyterhub.tests.utils import async_requests, public_host, public_url, ujoin from jupyterhub.utils import url_escape_path, url_path_join @@ -1127,6 +1128,7 @@ async def test_start_stop_server_on_admin_page( "fresh", "invalid", "valid-prefix-invalid-root", + "valid-prefix-invalid-other-prefix", ], ) async def test_login_xsrf_initial_cookies(app, browser, case, username): @@ -1136,6 +1138,7 @@ async def test_login_xsrf_initial_cookies(app, browser, case, username): """ hub_root = public_host(app) hub_url = url_path_join(public_host(app), app.hub.base_url) + hub_parent = hub_url.rstrip("/").rsplit("/", 1)[0] + "/" login_url = url_path_join( hub_url, url_concat("login", {"next": url_path_join(app.base_url, "/hub/home")}) ) @@ -1145,7 +1148,11 @@ async def test_login_xsrf_initial_cookies(app, browser, case, username): await browser.context.add_cookies( [{"name": "_xsrf", "value": "invalid-hub-prefix", "url": hub_url}] ) - elif case == "valid-prefix-invalid-root": + elif case.startswith("valid-prefix"): + if "invalid-root" in case: + invalid_url = hub_root + else: + invalid_url = hub_parent await browser.goto(login_url) # first visit sets valid xsrf cookie cookies = await browser.context.cookies() @@ -1157,7 +1164,7 @@ async def test_login_xsrf_initial_cookies(app, browser, case, username): # currently, this test assumes the observed behavior, # which is that the invalid cookie on `/` has _higher_ priority await browser.context.add_cookies( - [{"name": "_xsrf", "value": "invalid-root", "url": hub_root}] + [{"name": "_xsrf", "value": "invalid-root", "url": invalid_url}] ) cookies = await browser.context.cookies() assert len(cookies) == 2 @@ -1190,7 +1197,9 @@ def _cookie_dict(cookie_list): return cookie_dict -async def test_singleuser_xsrf(app, browser, user, create_user_with_scopes, full_spawn): +async def test_singleuser_xsrf( + app, browser, user, create_user_with_scopes, full_spawn, named_servers # noqa: F811 +): # full login process, checking XSRF handling # start two servers target_user = user @@ -1311,33 +1320,61 @@ async def test_singleuser_xsrf(app, browser, user, create_user_with_scopes, full # check that server page can still connect to its own kernels token = target_user.new_api_token(scopes=["access:servers!user"]) - url = url_path_join(public_url(app, target_user), "/api/kernels") - headers = {"Authorization": f"Bearer {token}"} - r = await async_requests.post(url, headers=headers) - r.raise_for_status() - kernel = r.json() - kernel_id = kernel["id"] - kernel_url = url_path_join(url, kernel_id) - kernel_ws_url = "ws" + url_path_join(kernel_url, "channels")[4:] - try: - result = await browser.evaluate( - """ - async (ws_url) => { - ws = new WebSocket(ws_url); - finished = await new Promise((resolve, reject) => { - ws.onerror = (err) => { - reject(err); - }; - ws.onopen = () => { - resolve("ok"); - }; - }); - return finished; - } - """, - kernel_ws_url, - ) - finally: - r = await async_requests.delete(kernel_url, headers=headers) + + async def test_kernel(kernels_url): + headers = {"Authorization": f"Bearer {token}"} + r = await async_requests.post(kernels_url, headers=headers) r.raise_for_status() - assert result == "ok" + kernel = r.json() + kernel_id = kernel["id"] + kernel_url = url_path_join(kernels_url, kernel_id) + kernel_ws_url = "ws" + url_path_join(kernel_url, "channels")[4:] + try: + result = await browser.evaluate( + """ + async (ws_url) => { + ws = new WebSocket(ws_url); + finished = await new Promise((resolve, reject) => { + ws.onerror = (err) => { + reject(err); + }; + ws.onopen = () => { + resolve("ok"); + }; + }); + return finished; + } + """, + kernel_ws_url, + ) + finally: + r = await async_requests.delete(kernel_url, headers=headers) + r.raise_for_status() + assert result == "ok" + + kernels_url = url_path_join(public_url(app, target_user), "/api/kernels") + await test_kernel(kernels_url) + + # final check: make sure named servers work. + # first, visit spawn page to launch server, + # will issue cookies, etc. + server_name = "named" + url = url_path_join( + public_host(app), + url_path_join(app.base_url, f"hub/spawn/{browser_user.name}/{server_name}"), + ) + await browser.goto(url) + await expect(browser).to_have_url( + re.compile(rf".*/user/{browser_user.name}/{server_name}/.*") + ) + # from named server URL, make sure we can talk to a kernel + token = browser_user.new_api_token(scopes=["access:servers!user"]) + # named-server URL + kernels_url = url_path_join( + public_url(app, browser_user), server_name, "api/kernels" + ) + await test_kernel(kernels_url) + # go back to user's own page, test again + # make sure we didn't break anything + await browser.goto(public_url(app, browser_user)) + await test_kernel(url_path_join(public_url(app, browser_user), "api/kernels")) diff --git a/jupyterhub/tests/conftest.py b/jupyterhub/tests/conftest.py index 31664605..0f01528b 100644 --- a/jupyterhub/tests/conftest.py +++ b/jupyterhub/tests/conftest.py @@ -448,8 +448,6 @@ def create_user_with_scopes(app, create_temp_role): return app.users[orm_user.id] yield temp_user_creator - for user in temp_users: - app.users.delete(user) @fixture diff --git a/jupyterhub/tests/test_services_auth.py b/jupyterhub/tests/test_services_auth.py index de72fdba..04d91916 100644 --- a/jupyterhub/tests/test_services_auth.py +++ b/jupyterhub/tests/test_services_auth.py @@ -528,7 +528,7 @@ async def test_oauth_cookie_collision(app, mockservice_url, create_user_with_sco print(url) s = AsyncSession() name = 'mypha' - user = create_user_with_scopes("access:services", name=name) + create_user_with_scopes("access:services", name=name) s.cookies = await app.login_user(name) state_cookie_name = 'service-%s-oauth-state' % service.name service_cookie_name = 'service-%s' % service.name @@ -551,10 +551,9 @@ async def test_oauth_cookie_collision(app, mockservice_url, create_user_with_sco assert s.cookies[state_cookie_name] == state_1 # finish oauth 2 + hub_xsrf = s.cookies.get("_xsrf", path=app.hub.base_url) # submit the oauth form to complete authorization - r = await s.post( - oauth_2.url, data={'scopes': ['identify'], "_xsrf": s.cookies["_xsrf"]} - ) + r = await s.post(oauth_2.url, data={'scopes': ['identify'], "_xsrf": hub_xsrf}) r.raise_for_status() assert r.url == url # after finishing, state cookie is cleared @@ -564,9 +563,7 @@ async def test_oauth_cookie_collision(app, mockservice_url, create_user_with_sco service_cookie_2 = s.cookies[service_cookie_name] # finish oauth 1 - r = await s.post( - oauth_1.url, data={'scopes': ['identify'], "_xsrf": s.cookies["_xsrf"]} - ) + r = await s.post(oauth_1.url, data={'scopes': ['identify'], "_xsrf": hub_xsrf}) r.raise_for_status() assert r.url == url @@ -635,7 +632,7 @@ async def test_oauth_logout(app, mockservice_url, create_user_with_scopes): r = await s.get(public_url(app, path='hub/logout')) r.raise_for_status() # verify that all cookies other than the service cookie are cleared - assert sorted(s.cookies.keys()) == ["_xsrf", service_cookie_name] + assert sorted(set(s.cookies.keys())) == ["_xsrf", service_cookie_name] # verify that clearing session id invalidates service cookie # i.e. redirect back to login page r = await s.get(url) From 87c2aebb5cb36a4801bd9d1a58201299f411732c Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 25 Mar 2024 13:18:54 +0100 Subject: [PATCH 2/2] avoid cycle on current_user in _set_xsrf_cookie pass authenticated explicitly --- jupyterhub/_xsrf_utils.py | 13 ++++++++++--- jupyterhub/handlers/base.py | 4 +++- jupyterhub/services/auth.py | 12 +++++++++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/jupyterhub/_xsrf_utils.py b/jupyterhub/_xsrf_utils.py index 07fb3139..2517bea6 100644 --- a/jupyterhub/_xsrf_utils.py +++ b/jupyterhub/_xsrf_utils.py @@ -104,13 +104,20 @@ def _get_xsrf_token_cookie(handler): return (None, None) -def _set_xsrf_cookie(handler, xsrf_id, cookie_path=""): +def _set_xsrf_cookie(handler, xsrf_id, *, cookie_path="", authenticated=None): """Set xsrf token cookie""" xsrf_token = _create_signed_value_urlsafe(handler, "_xsrf", xsrf_id) xsrf_cookie_kwargs = {} xsrf_cookie_kwargs.update(handler.settings.get('xsrf_cookie_kwargs', {})) xsrf_cookie_kwargs.setdefault("path", cookie_path) - if not handler.current_user: + if authenticated is None: + try: + current_user = handler.current_user + except Exception: + authenticated = False + else: + authenticated = bool(current_user) + if not authenticated: # limit anonymous xsrf cookies to one hour xsrf_cookie_kwargs.pop("expires", None) xsrf_cookie_kwargs.pop("expires_days", None) @@ -166,7 +173,7 @@ def get_xsrf_token(handler, cookie_path=""): ) if _set_cookie: - _set_xsrf_cookie(handler, xsrf_id, cookie_path) + _set_xsrf_cookie(handler, xsrf_id, cookie_path=cookie_path) handler._xsrf_token = xsrf_token return xsrf_token diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 2f6b8033..77a13892 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -738,7 +738,9 @@ class BaseHandler(RequestHandler): # make sure xsrf cookie is updated # this avoids needing a second request to set the right xsrf cookie self._jupyterhub_user = user - _set_xsrf_cookie(self, self._xsrf_token_id, cookie_path=self.hub.base_url) + _set_xsrf_cookie( + self, self._xsrf_token_id, cookie_path=self.hub.base_url, authenticated=True + ) def authenticate(self, data): return maybe_future(self.authenticator.get_authenticated_user(self, data)) diff --git a/jupyterhub/services/auth.py b/jupyterhub/services/auth.py index 999b9f3b..b3694055 100644 --- a/jupyterhub/services/auth.py +++ b/jupyterhub/services/auth.py @@ -45,6 +45,7 @@ from tornado.httpclient import AsyncHTTPClient, HTTPRequest from tornado.httputil import url_concat from tornado.log import app_log from tornado.web import HTTPError, RequestHandler +from tornado.websocket import WebSocketHandler from traitlets import ( Any, Bool, @@ -805,6 +806,10 @@ class HubAuth(SingletonConfigurable): if not hasattr(self, 'set_cookie'): # only HubOAuth can persist cookies return + fetch_mode = handler.request.headers.get("Sec-Fetch-Mode", "navigate") + if isinstance(handler, WebSocketHandler) or fetch_mode != "navigate": + # don't do this on websockets or non-navigate requests + return self.log.info( "Storing token from url in cookie for %s", handler.request.remote_ip, @@ -1197,7 +1202,12 @@ class HubOAuth(HubAuth): # set updated xsrf token cookie, # which changes after login handler._hub_auth_token_cookie = access_token - _set_xsrf_cookie(handler, handler._xsrf_token_id, cookie_path=self.base_url) + _set_xsrf_cookie( + handler, + handler._xsrf_token_id, + cookie_path=self.base_url, + authenticated=True, + ) def clear_cookie(self, handler): """Clear the OAuth cookie"""