diff --git a/docs/source/conf.py b/docs/source/conf.py index c3eca146..c4db7833 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -182,6 +182,7 @@ html_context = { linkcheck_ignore = [ r"(.*)github\.com(.*)#", # javascript based anchors r"(.*)/#%21(.*)/(.*)", # /#!forum/jupyter - encoded anchor edge case + r"https?://(.*\.)?example\.(org|com)(/.*)?", # example links r"https://github.com/[^/]*$", # too many github usernames / searches in changelog "https://github.com/jupyterhub/jupyterhub/pull/", # too many PRs in changelog "https://github.com/jupyterhub/jupyterhub/compare/", # too many comparisons in changelog diff --git a/docs/source/reference/services.md b/docs/source/reference/services.md index c2fd7f0e..1103ea8a 100644 --- a/docs/source/reference/services.md +++ b/docs/source/reference/services.md @@ -138,6 +138,14 @@ JUPYTERHUB_OAUTH_SCOPES: JSON-serialized list of scopes to use for allowing ac (deprecated in 3.0, use JUPYTERHUB_OAUTH_ACCESS_SCOPES). JUPYTERHUB_OAUTH_ACCESS_SCOPES: JSON-serialized list of scopes to use for allowing access to the service (new in 3.0). JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES: JSON-serialized list of scopes that can be requested by the oauth client on behalf of users (new in 3.0). +JUPYTERHUB_PUBLIC_URL: the public URL of the service, + e.g. `https://jupyterhub.example.org/services/name/`. + Empty if no public URL is specified (default). + Will be available if subdomains are configured. +JUPYTERHUB_PUBLIC_HUB_URL: the public URL of JupyterHub as a whole, + e.g. `https://jupyterhub.example.org/`. + Empty if no public URL is specified (default). + Will be available if subdomains are configured. ``` For the previous 'cull idle' Service example, these environment variables diff --git a/docs/source/reference/spawners.md b/docs/source/reference/spawners.md index 28b21012..5183247f 100644 --- a/docs/source/reference/spawners.md +++ b/docs/source/reference/spawners.md @@ -315,6 +315,14 @@ The process environment is returned by `Spawner.get_env`, which specifies the fo - `JUPYTERHUB_OAUTH_ACCESS_SCOPES` - the scopes required to access the server (called `JUPYTERHUB_OAUTH_SCOPES` prior to 3.0) - `JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES` - the scopes the service is allowed to request. If no scopes are requested explicitly, these scopes will be requested. +- `JUPYTERHUB_PUBLIC_URL` - the public URL of the server, + e.g. `https://jupyterhub.example.org/user/name/`. + Empty if no public URL is specified (default). + Will be available if subdomains are configured. +- `JUPYTERHUB_PUBLIC_HUB_URL` - the public URL of JupyterHub as a whole, + e.g. `https://jupyterhub.example.org/`. + Empty if no public URL is specified (default). + Will be available if subdomains are configured. Optional environment variables, depending on configuration: diff --git a/jupyterhub/apihandlers/auth.py b/jupyterhub/apihandlers/auth.py index 6a47bf0f..84cde4f1 100644 --- a/jupyterhub/apihandlers/auth.py +++ b/jupyterhub/apihandlers/auth.py @@ -109,14 +109,20 @@ class OAuthHandler: redirect_uri = self.get_argument('redirect_uri') if not redirect_uri or not redirect_uri.startswith('/'): return uri + # make absolute local redirects full URLs # to satisfy oauthlib's absolute URI requirement - redirect_uri = ( - get_browser_protocol(self.request) - + "://" - + self.request.host - + redirect_uri - ) + + public_url = self.settings.get("public_url") + if public_url: + proto = public_url.scheme + host = public_url.netloc + else: + # guess from request + proto = get_browser_protocol(self.request) + host = self.request.host + redirect_uri = f"{proto}://{host}{redirect_uri}" + parsed_url = urlparse(uri) query_list = parse_qsl(parsed_url.query, keep_blank_values=True) for idx, item in enumerate(query_list): diff --git a/jupyterhub/app.py b/jupyterhub/app.py index 64be37c9..05bccb34 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -698,6 +698,61 @@ class JupyterHub(Application): proto = 'https' if self.ssl_cert else 'http' return proto + '://:8000' + public_url = Unicode( + "", + config=True, + help="""Set the public URL of JupyterHub + + This will skip any detection of URL and protocol from requests, + which isn't always correct when JupyterHub is behind + multiple layers of proxies, etc. + Usually the failure is detecting http when it's really https. + + Should include the full, public URL of JupyterHub, + including the public-facing base_url prefix + (i.e. it should include a trailing slash), e.g. + https://jupyterhub.example.org/prefix/ + """, + ) + + @default("public_url") + def _default_public_url(self): + if self.subdomain_host: + # if subdomain_host is specified, use it by default + return self.subdomain_host + self.base_url + else: + return "" + + @validate("public_url") + def _validate_public_url(self, proposal): + url = proposal.value + if not url: + # explicitly empty (default) + return url + if not url.endswith("/"): + # ensure we have a trailing slash + # for consistency with base_url + url = url + "/" + if not url.endswith(self.base_url): + if not urlparse(url).path.strip("/"): + # no path specified, add base_url and warn + url = url.rstrip("/") + self.base_url + self.log.warning( + f"Adding missing base_url {self.base_url!r} to JupyterHub.public_url = {url!r}" + ) + else: + # path specified but it doesn't match, raise + raise ValueError( + f"JupyterHub.public_url = {url!r} must include base_url: {self.base_url!r}" + ) + if "://" not in url: + # https by default; should be specified + url = 'https://' + url + self.log.warning( + f"Adding missing protocol 'https://' to JupyterHub.public_url = {url!r}" + ) + return url + subdomain_host = Unicode( '', help="""Run single-user servers on subdomains of this host. @@ -721,15 +776,18 @@ class JupyterHub(Application): # host should include '://' # if not specified, assume https: You have to be really explicit about HTTP! new = 'https://' + new + self.log.warning( + f"Adding missing protocol 'https://' to JupyterHub.subdomain_host = {new!r}" + ) return new domain = Unicode(help="domain name, e.g. 'example.com' (excludes protocol, port)") @default('domain') def _domain_default(self): - if not self.subdomain_host: + if not (self.public_url or self.subdomain_host): return '' - return urlparse(self.subdomain_host).hostname + return urlparse(self.public_url or self.subdomain_host).hostname subdomain_hook = Union( [Callable(), Unicode()], @@ -1941,10 +1999,15 @@ class JupyterHub(Application): def init_hub(self): """Load the Hub URL config""" + if self.public_url: + # host = scheme://hostname:port (no path) + public_host = urlunparse(urlparse(self.public_url)._replace(path="")) + else: + public_host = self.subdomain_host hub_args = dict( base_url=self.hub_prefix, routespec=self.hub_routespec, - public_host=self.subdomain_host, + public_host=public_host, certfile=self.internal_ssl_cert, keyfile=self.internal_ssl_key, cafile=self.internal_ssl_ca, @@ -2462,9 +2525,9 @@ class JupyterHub(Application): """ name = orm_service.name - if self.domain: + if self.subdomain_host: parsed_host = urlparse(self.subdomain_host) - domain = self.subdomain_hook(name, self.domain, kind="service") + domain = self.subdomain_hook(name, parsed_host.hostname, kind="service") host = f"{parsed_host.scheme}://{domain}" if parsed_host.port: host = f"{host}:{parsed_host.port}" @@ -2526,9 +2589,9 @@ class JupyterHub(Application): name = spec['name'] - if self.domain: + if self.subdomain_host: parsed_host = urlparse(self.subdomain_host) - domain = self.subdomain_hook(name, self.domain, kind="service") + domain = self.subdomain_hook(name, parsed_host.hostname, kind="service") host = f"{parsed_host.scheme}://{domain}" if parsed_host.port: host = f"{host}:{parsed_host.port}" @@ -2974,6 +3037,7 @@ class JupyterHub(Application): spawner_class=self.spawner_class, base_url=self.base_url, default_url=self.default_url, + public_url=urlparse(self.public_url) if self.public_url else "", cookie_secret=self.cookie_secret, cookie_max_age_days=self.cookie_max_age_days, redirect_to_server=self.redirect_to_server, diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index ed86068f..c2785ea8 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -138,6 +138,10 @@ class BaseHandler(RequestHandler): def domain(self): return self.settings['domain'] + @property + def public_url(self): + return self.settings['public_url'] + @property def db(self): return self.settings['db'] @@ -577,8 +581,13 @@ class BaseHandler(RequestHandler): # tornado <4.2 have a bug that consider secure==True as soon as # 'secure' kwarg is passed to set_secure_cookie kwargs = {'httponly': True} - if self.request.protocol == 'https': - kwargs['secure'] = True + public_url = self.settings.get("public_url") + if public_url: + if public_url.scheme == 'https': + kwargs['secure'] = True + else: + if self.request.protocol == 'https': + kwargs['secure'] = True kwargs.update(self.settings.get('cookie_options', {})) kwargs.update(overrides) @@ -670,8 +679,15 @@ class BaseHandler(RequestHandler): next_url = self.get_argument('next', default='') # protect against some browsers' buggy handling of backslash as slash next_url = next_url.replace('\\', '%5C') - proto = get_browser_protocol(self.request) - host = self.request.host + public_url = self.settings.get("public_url") + if public_url: + proto = public_url.scheme + host = public_url.netloc + else: + # guess from request + proto = get_browser_protocol(self.request) + host = self.request.host + if next_url.startswith("///"): # strip more than 2 leading // down to 2 # because urlparse treats that as empty netloc, diff --git a/jupyterhub/services/auth.py b/jupyterhub/services/auth.py index 6bc57fc5..6473b2ad 100644 --- a/jupyterhub/services/auth.py +++ b/jupyterhub/services/auth.py @@ -37,7 +37,7 @@ import uuid import warnings from http import HTTPStatus from unittest import mock -from urllib.parse import urlencode +from urllib.parse import urlencode, urlparse from tornado.httpclient import AsyncHTTPClient, HTTPRequest from tornado.httputil import url_concat @@ -924,8 +924,13 @@ class HubOAuth(HubAuth): # OAuth that doesn't complete shouldn't linger too long. 'max_age': 600, } - if get_browser_protocol(handler.request) == 'https': - kwargs['secure'] = True + public_url = os.getenv("JUPYTERHUB_PUBLIC_URL") + if public_url: + if urlparse(public_url).scheme == 'https': + kwargs['secure'] = True + else: + if get_browser_protocol(handler.request) == 'https': + kwargs['secure'] = True # load user cookie overrides kwargs.update(self.cookie_options) handler.set_secure_cookie(cookie_name, b64_state, **kwargs) diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index 719ecb88..3d74572b 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -162,6 +162,8 @@ class Spawner(LoggingConfigurable): hub = Any() orm_spawner = Any() cookie_options = Dict() + public_url = Unicode(help="Public URL of this spawner's server") + public_hub_url = Unicode(help="Public URL of the Hub itself") db = Any() @@ -1047,6 +1049,10 @@ class Spawner(LoggingConfigurable): bind_url = f"{proto}://{self.ip}:{self.port}{base_url}" env["JUPYTERHUB_SERVICE_URL"] = bind_url + # the public URLs of this server and the Hub + env["JUPYTERHUB_PUBLIC_URL"] = self.public_url + env["JUPYTERHUB_PUBLIC_HUB_URL"] = self.public_hub_url + # Put in limit and guarantee info if they exist. # Note that this is for use by the humans / notebook extensions in the # single-user notebook server, and not for direct usage by the spawners diff --git a/jupyterhub/tests/test_user.py b/jupyterhub/tests/test_user.py index 4877a333..a9eaf10b 100644 --- a/jupyterhub/tests/test_user.py +++ b/jupyterhub/tests/test_user.py @@ -1,3 +1,6 @@ +from unittest import mock +from urllib.parse import urlparse + import pytest from .. import orm @@ -66,3 +69,42 @@ def test_sync_groups(app, user, group_names): def test_server_url(app, user, server_name, path): user_url = user.url assert user.server_url(server_name) == user_url + path + + +@pytest.mark.parametrize( + "server_name, public_url, subdomain_host, expected_url", + [ + ("", "", "", ""), + ("name", "", "", ""), + ("", "https://hub.tld/PREFIX/", "", "https://hub.tld/PREFIX/user/USERNAME/"), + ( + "name", + "https://hub.tld/PREFIX/", + "", + "https://hub.tld/PREFIX/user/USERNAME/name/", + ), + ( + "name", + "", + "https://hub.tld:123", + "https://USERNAME.hub.tld:123/PREFIX/user/USERNAME/name/", + ), + ], +) +def test_public_url(app, user, server_name, public_url, subdomain_host, expected_url): + expected_url = expected_url.replace("USERNAME", user.escaped_name).replace( + "PREFIX", app.base_url.strip("/") + ) + if public_url: + public_url = public_url.replace("PREFIX", app.base_url.strip("/")) + public_url = urlparse(public_url) + with mock.patch.dict( + user.settings, + { + "subdomain_host": subdomain_host, + "domain": urlparse(subdomain_host).hostname, + "public_url": public_url, + }, + ): + public_server_url = user.public_url(server_name) + assert public_server_url == expected_url diff --git a/jupyterhub/user.py b/jupyterhub/user.py index ea0eca66..ac2094f9 100644 --- a/jupyterhub/user.py +++ b/jupyterhub/user.py @@ -4,7 +4,7 @@ import json import warnings from collections import defaultdict from datetime import timedelta -from urllib.parse import quote, urlparse +from urllib.parse import quote, urlparse, urlunparse from sqlalchemy import inspect from tornado import gen, web @@ -438,6 +438,20 @@ class User: ) spawn_kwargs.update(ssl_kwargs) + # public URLs + if self.settings.get("public_url"): + public_url = self.settings["public_url"] + hub = self.settings.get('hub') + if hub is None: + # only in mock tests + hub_path = "/hub/" + else: + hub_path = hub.base_url + spawn_kwargs["public_hub_url"] = urlunparse( + public_url._replace(path=hub_path) + ) + spawn_kwargs["public_url"] = self.public_url(server_name) + # update with kwargs. Mainly for testing. spawn_kwargs.update(kwargs) spawner = spawner_class(**spawn_kwargs) @@ -541,12 +555,19 @@ class User: @property def host(self): """Get the *host* for my server (proto://domain[:port])""" - # FIXME: escaped_name probably isn't escaped enough in general for a domain fragment - parsed = urlparse(self.settings['subdomain_host']) - h = f'{parsed.scheme}://{self.domain}' - if parsed.port: - h += ':%i' % parsed.port - return h + # if subdomains are used, use our domain + + if self.settings.get('subdomain_host'): + parsed = urlparse(self.settings['subdomain_host']) + h = f"{parsed.scheme}://{self.domain}" + if parsed.port: + h = f"{h}:{parsed.port}" + return h + elif self.settings.get("public_url"): + # no subdomain, use public host url without path + return urlunparse(self.settings["public_url"]._replace(path="")) + else: + return "" @property def url(self): @@ -554,8 +575,8 @@ class User: Full name.domain/path if using subdomains, otherwise just my /base/url """ - if self.settings.get('subdomain_host'): - return f'{self.host}{self.base_url}' + if self.settings.get("subdomain_host"): + return f"{self.host}{self.base_url}" else: return self.base_url @@ -566,6 +587,24 @@ class User: else: return url_path_join(self.url, url_escape_path(server_name), "/") + def public_url(self, server_name=''): + """Get the public URL of a server by name + + Like server_url, but empty if no public URL is specified + """ + # server_url will be a full URL if using subdomains + url = self.server_url(server_name) + if "://" not in url: + # not using subdomains, public URL may be specified + if self.settings.get("public_url"): + # add server's base_url path prefix to public host + url = urlunparse(self.settings["public_url"]._replace(path=url)) + else: + # no public url (from subdomain or host), + # leave unspecified + url = "" + return url + def progress_url(self, server_name=''): """API URL for progress endpoint for a server with a given name""" url_parts = [self.settings['hub'].base_url, 'api/users', self.escaped_name]