Merge pull request #4479 from minrk/jupyterhub-public-url

add JupyterHub.public_url config
This commit is contained in:
Erik Sundell
2024-01-24 23:56:58 +01:00
committed by GitHub
10 changed files with 224 additions and 29 deletions

View File

@@ -182,6 +182,7 @@ html_context = {
linkcheck_ignore = [ linkcheck_ignore = [
r"(.*)github\.com(.*)#", # javascript based anchors r"(.*)github\.com(.*)#", # javascript based anchors
r"(.*)/#%21(.*)/(.*)", # /#!forum/jupyter - encoded anchor edge case r"(.*)/#%21(.*)/(.*)", # /#!forum/jupyter - encoded anchor edge case
r"https?://(.*\.)?example\.(org|com)(/.*)?", # example links
r"https://github.com/[^/]*$", # too many github usernames / searches in changelog r"https://github.com/[^/]*$", # too many github usernames / searches in changelog
"https://github.com/jupyterhub/jupyterhub/pull/", # too many PRs in changelog "https://github.com/jupyterhub/jupyterhub/pull/", # too many PRs in changelog
"https://github.com/jupyterhub/jupyterhub/compare/", # too many comparisons in changelog "https://github.com/jupyterhub/jupyterhub/compare/", # too many comparisons in changelog

View File

@@ -138,6 +138,14 @@ JUPYTERHUB_OAUTH_SCOPES: JSON-serialized list of scopes to use for allowing ac
(deprecated in 3.0, use JUPYTERHUB_OAUTH_ACCESS_SCOPES). (deprecated in 3.0, use JUPYTERHUB_OAUTH_ACCESS_SCOPES).
JUPYTERHUB_OAUTH_ACCESS_SCOPES: JSON-serialized list of scopes to use for allowing access to the service (new in 3.0). JUPYTERHUB_OAUTH_ACCESS_SCOPES: JSON-serialized list of scopes to use for allowing access to the service (new in 3.0).
JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES: JSON-serialized list of scopes that can be requested by the oauth client on behalf of users (new in 3.0). JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES: JSON-serialized list of scopes that can be requested by the oauth client on behalf of users (new in 3.0).
JUPYTERHUB_PUBLIC_URL: the public URL of the service,
e.g. `https://jupyterhub.example.org/services/name/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
JUPYTERHUB_PUBLIC_HUB_URL: the public URL of JupyterHub as a whole,
e.g. `https://jupyterhub.example.org/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
``` ```
For the previous 'cull idle' Service example, these environment variables For the previous 'cull idle' Service example, these environment variables

View File

@@ -315,6 +315,14 @@ The process environment is returned by `Spawner.get_env`, which specifies the fo
- `JUPYTERHUB_OAUTH_ACCESS_SCOPES` - the scopes required to access the server (called `JUPYTERHUB_OAUTH_SCOPES` prior to 3.0) - `JUPYTERHUB_OAUTH_ACCESS_SCOPES` - the scopes required to access the server (called `JUPYTERHUB_OAUTH_SCOPES` prior to 3.0)
- `JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES` - the scopes the service is allowed to request. - `JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES` - the scopes the service is allowed to request.
If no scopes are requested explicitly, these scopes will be requested. If no scopes are requested explicitly, these scopes will be requested.
- `JUPYTERHUB_PUBLIC_URL` - the public URL of the server,
e.g. `https://jupyterhub.example.org/user/name/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
- `JUPYTERHUB_PUBLIC_HUB_URL` - the public URL of JupyterHub as a whole,
e.g. `https://jupyterhub.example.org/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
Optional environment variables, depending on configuration: Optional environment variables, depending on configuration:

View File

@@ -109,14 +109,20 @@ class OAuthHandler:
redirect_uri = self.get_argument('redirect_uri') redirect_uri = self.get_argument('redirect_uri')
if not redirect_uri or not redirect_uri.startswith('/'): if not redirect_uri or not redirect_uri.startswith('/'):
return uri return uri
# make absolute local redirects full URLs # make absolute local redirects full URLs
# to satisfy oauthlib's absolute URI requirement # to satisfy oauthlib's absolute URI requirement
redirect_uri = (
get_browser_protocol(self.request) public_url = self.settings.get("public_url")
+ "://" if public_url:
+ self.request.host proto = public_url.scheme
+ redirect_uri host = public_url.netloc
) else:
# guess from request
proto = get_browser_protocol(self.request)
host = self.request.host
redirect_uri = f"{proto}://{host}{redirect_uri}"
parsed_url = urlparse(uri) parsed_url = urlparse(uri)
query_list = parse_qsl(parsed_url.query, keep_blank_values=True) query_list = parse_qsl(parsed_url.query, keep_blank_values=True)
for idx, item in enumerate(query_list): for idx, item in enumerate(query_list):

View File

@@ -698,6 +698,61 @@ class JupyterHub(Application):
proto = 'https' if self.ssl_cert else 'http' proto = 'https' if self.ssl_cert else 'http'
return proto + '://:8000' return proto + '://:8000'
public_url = Unicode(
"",
config=True,
help="""Set the public URL of JupyterHub
This will skip any detection of URL and protocol from requests,
which isn't always correct when JupyterHub is behind
multiple layers of proxies, etc.
Usually the failure is detecting http when it's really https.
Should include the full, public URL of JupyterHub,
including the public-facing base_url prefix
(i.e. it should include a trailing slash), e.g.
https://jupyterhub.example.org/prefix/
""",
)
@default("public_url")
def _default_public_url(self):
if self.subdomain_host:
# if subdomain_host is specified, use it by default
return self.subdomain_host + self.base_url
else:
return ""
@validate("public_url")
def _validate_public_url(self, proposal):
url = proposal.value
if not url:
# explicitly empty (default)
return url
if not url.endswith("/"):
# ensure we have a trailing slash
# for consistency with base_url
url = url + "/"
if not url.endswith(self.base_url):
if not urlparse(url).path.strip("/"):
# no path specified, add base_url and warn
url = url.rstrip("/") + self.base_url
self.log.warning(
f"Adding missing base_url {self.base_url!r} to JupyterHub.public_url = {url!r}"
)
else:
# path specified but it doesn't match, raise
raise ValueError(
f"JupyterHub.public_url = {url!r} must include base_url: {self.base_url!r}"
)
if "://" not in url:
# https by default; should be specified
url = 'https://' + url
self.log.warning(
f"Adding missing protocol 'https://' to JupyterHub.public_url = {url!r}"
)
return url
subdomain_host = Unicode( subdomain_host = Unicode(
'', '',
help="""Run single-user servers on subdomains of this host. help="""Run single-user servers on subdomains of this host.
@@ -721,15 +776,18 @@ class JupyterHub(Application):
# host should include '://' # host should include '://'
# if not specified, assume https: You have to be really explicit about HTTP! # if not specified, assume https: You have to be really explicit about HTTP!
new = 'https://' + new new = 'https://' + new
self.log.warning(
f"Adding missing protocol 'https://' to JupyterHub.subdomain_host = {new!r}"
)
return new return new
domain = Unicode(help="domain name, e.g. 'example.com' (excludes protocol, port)") domain = Unicode(help="domain name, e.g. 'example.com' (excludes protocol, port)")
@default('domain') @default('domain')
def _domain_default(self): def _domain_default(self):
if not self.subdomain_host: if not (self.public_url or self.subdomain_host):
return '' return ''
return urlparse(self.subdomain_host).hostname return urlparse(self.public_url or self.subdomain_host).hostname
subdomain_hook = Union( subdomain_hook = Union(
[Callable(), Unicode()], [Callable(), Unicode()],
@@ -1941,10 +1999,15 @@ class JupyterHub(Application):
def init_hub(self): def init_hub(self):
"""Load the Hub URL config""" """Load the Hub URL config"""
if self.public_url:
# host = scheme://hostname:port (no path)
public_host = urlunparse(urlparse(self.public_url)._replace(path=""))
else:
public_host = self.subdomain_host
hub_args = dict( hub_args = dict(
base_url=self.hub_prefix, base_url=self.hub_prefix,
routespec=self.hub_routespec, routespec=self.hub_routespec,
public_host=self.subdomain_host, public_host=public_host,
certfile=self.internal_ssl_cert, certfile=self.internal_ssl_cert,
keyfile=self.internal_ssl_key, keyfile=self.internal_ssl_key,
cafile=self.internal_ssl_ca, cafile=self.internal_ssl_ca,
@@ -2462,9 +2525,9 @@ class JupyterHub(Application):
""" """
name = orm_service.name name = orm_service.name
if self.domain: if self.subdomain_host:
parsed_host = urlparse(self.subdomain_host) parsed_host = urlparse(self.subdomain_host)
domain = self.subdomain_hook(name, self.domain, kind="service") domain = self.subdomain_hook(name, parsed_host.hostname, kind="service")
host = f"{parsed_host.scheme}://{domain}" host = f"{parsed_host.scheme}://{domain}"
if parsed_host.port: if parsed_host.port:
host = f"{host}:{parsed_host.port}" host = f"{host}:{parsed_host.port}"
@@ -2526,9 +2589,9 @@ class JupyterHub(Application):
name = spec['name'] name = spec['name']
if self.domain: if self.subdomain_host:
parsed_host = urlparse(self.subdomain_host) parsed_host = urlparse(self.subdomain_host)
domain = self.subdomain_hook(name, self.domain, kind="service") domain = self.subdomain_hook(name, parsed_host.hostname, kind="service")
host = f"{parsed_host.scheme}://{domain}" host = f"{parsed_host.scheme}://{domain}"
if parsed_host.port: if parsed_host.port:
host = f"{host}:{parsed_host.port}" host = f"{host}:{parsed_host.port}"
@@ -2974,6 +3037,7 @@ class JupyterHub(Application):
spawner_class=self.spawner_class, spawner_class=self.spawner_class,
base_url=self.base_url, base_url=self.base_url,
default_url=self.default_url, default_url=self.default_url,
public_url=urlparse(self.public_url) if self.public_url else "",
cookie_secret=self.cookie_secret, cookie_secret=self.cookie_secret,
cookie_max_age_days=self.cookie_max_age_days, cookie_max_age_days=self.cookie_max_age_days,
redirect_to_server=self.redirect_to_server, redirect_to_server=self.redirect_to_server,

View File

@@ -138,6 +138,10 @@ class BaseHandler(RequestHandler):
def domain(self): def domain(self):
return self.settings['domain'] return self.settings['domain']
@property
def public_url(self):
return self.settings['public_url']
@property @property
def db(self): def db(self):
return self.settings['db'] return self.settings['db']
@@ -577,8 +581,13 @@ class BaseHandler(RequestHandler):
# tornado <4.2 have a bug that consider secure==True as soon as # tornado <4.2 have a bug that consider secure==True as soon as
# 'secure' kwarg is passed to set_secure_cookie # 'secure' kwarg is passed to set_secure_cookie
kwargs = {'httponly': True} kwargs = {'httponly': True}
if self.request.protocol == 'https': public_url = self.settings.get("public_url")
kwargs['secure'] = True if public_url:
if public_url.scheme == 'https':
kwargs['secure'] = True
else:
if self.request.protocol == 'https':
kwargs['secure'] = True
kwargs.update(self.settings.get('cookie_options', {})) kwargs.update(self.settings.get('cookie_options', {}))
kwargs.update(overrides) kwargs.update(overrides)
@@ -670,8 +679,15 @@ class BaseHandler(RequestHandler):
next_url = self.get_argument('next', default='') next_url = self.get_argument('next', default='')
# protect against some browsers' buggy handling of backslash as slash # protect against some browsers' buggy handling of backslash as slash
next_url = next_url.replace('\\', '%5C') next_url = next_url.replace('\\', '%5C')
proto = get_browser_protocol(self.request) public_url = self.settings.get("public_url")
host = self.request.host if public_url:
proto = public_url.scheme
host = public_url.netloc
else:
# guess from request
proto = get_browser_protocol(self.request)
host = self.request.host
if next_url.startswith("///"): if next_url.startswith("///"):
# strip more than 2 leading // down to 2 # strip more than 2 leading // down to 2
# because urlparse treats that as empty netloc, # because urlparse treats that as empty netloc,

View File

@@ -37,7 +37,7 @@ import uuid
import warnings import warnings
from http import HTTPStatus from http import HTTPStatus
from unittest import mock from unittest import mock
from urllib.parse import urlencode from urllib.parse import urlencode, urlparse
from tornado.httpclient import AsyncHTTPClient, HTTPRequest from tornado.httpclient import AsyncHTTPClient, HTTPRequest
from tornado.httputil import url_concat from tornado.httputil import url_concat
@@ -924,8 +924,13 @@ class HubOAuth(HubAuth):
# OAuth that doesn't complete shouldn't linger too long. # OAuth that doesn't complete shouldn't linger too long.
'max_age': 600, 'max_age': 600,
} }
if get_browser_protocol(handler.request) == 'https': public_url = os.getenv("JUPYTERHUB_PUBLIC_URL")
kwargs['secure'] = True if public_url:
if urlparse(public_url).scheme == 'https':
kwargs['secure'] = True
else:
if get_browser_protocol(handler.request) == 'https':
kwargs['secure'] = True
# load user cookie overrides # load user cookie overrides
kwargs.update(self.cookie_options) kwargs.update(self.cookie_options)
handler.set_secure_cookie(cookie_name, b64_state, **kwargs) handler.set_secure_cookie(cookie_name, b64_state, **kwargs)

View File

@@ -162,6 +162,8 @@ class Spawner(LoggingConfigurable):
hub = Any() hub = Any()
orm_spawner = Any() orm_spawner = Any()
cookie_options = Dict() cookie_options = Dict()
public_url = Unicode(help="Public URL of this spawner's server")
public_hub_url = Unicode(help="Public URL of the Hub itself")
db = Any() db = Any()
@@ -1047,6 +1049,10 @@ class Spawner(LoggingConfigurable):
bind_url = f"{proto}://{self.ip}:{self.port}{base_url}" bind_url = f"{proto}://{self.ip}:{self.port}{base_url}"
env["JUPYTERHUB_SERVICE_URL"] = bind_url env["JUPYTERHUB_SERVICE_URL"] = bind_url
# the public URLs of this server and the Hub
env["JUPYTERHUB_PUBLIC_URL"] = self.public_url
env["JUPYTERHUB_PUBLIC_HUB_URL"] = self.public_hub_url
# Put in limit and guarantee info if they exist. # Put in limit and guarantee info if they exist.
# Note that this is for use by the humans / notebook extensions in the # Note that this is for use by the humans / notebook extensions in the
# single-user notebook server, and not for direct usage by the spawners # single-user notebook server, and not for direct usage by the spawners

View File

@@ -1,3 +1,6 @@
from unittest import mock
from urllib.parse import urlparse
import pytest import pytest
from .. import orm from .. import orm
@@ -66,3 +69,42 @@ def test_sync_groups(app, user, group_names):
def test_server_url(app, user, server_name, path): def test_server_url(app, user, server_name, path):
user_url = user.url user_url = user.url
assert user.server_url(server_name) == user_url + path assert user.server_url(server_name) == user_url + path
@pytest.mark.parametrize(
"server_name, public_url, subdomain_host, expected_url",
[
("", "", "", ""),
("name", "", "", ""),
("", "https://hub.tld/PREFIX/", "", "https://hub.tld/PREFIX/user/USERNAME/"),
(
"name",
"https://hub.tld/PREFIX/",
"",
"https://hub.tld/PREFIX/user/USERNAME/name/",
),
(
"name",
"",
"https://hub.tld:123",
"https://USERNAME.hub.tld:123/PREFIX/user/USERNAME/name/",
),
],
)
def test_public_url(app, user, server_name, public_url, subdomain_host, expected_url):
expected_url = expected_url.replace("USERNAME", user.escaped_name).replace(
"PREFIX", app.base_url.strip("/")
)
if public_url:
public_url = public_url.replace("PREFIX", app.base_url.strip("/"))
public_url = urlparse(public_url)
with mock.patch.dict(
user.settings,
{
"subdomain_host": subdomain_host,
"domain": urlparse(subdomain_host).hostname,
"public_url": public_url,
},
):
public_server_url = user.public_url(server_name)
assert public_server_url == expected_url

View File

@@ -4,7 +4,7 @@ import json
import warnings import warnings
from collections import defaultdict from collections import defaultdict
from datetime import timedelta from datetime import timedelta
from urllib.parse import quote, urlparse from urllib.parse import quote, urlparse, urlunparse
from sqlalchemy import inspect from sqlalchemy import inspect
from tornado import gen, web from tornado import gen, web
@@ -438,6 +438,20 @@ class User:
) )
spawn_kwargs.update(ssl_kwargs) spawn_kwargs.update(ssl_kwargs)
# public URLs
if self.settings.get("public_url"):
public_url = self.settings["public_url"]
hub = self.settings.get('hub')
if hub is None:
# only in mock tests
hub_path = "/hub/"
else:
hub_path = hub.base_url
spawn_kwargs["public_hub_url"] = urlunparse(
public_url._replace(path=hub_path)
)
spawn_kwargs["public_url"] = self.public_url(server_name)
# update with kwargs. Mainly for testing. # update with kwargs. Mainly for testing.
spawn_kwargs.update(kwargs) spawn_kwargs.update(kwargs)
spawner = spawner_class(**spawn_kwargs) spawner = spawner_class(**spawn_kwargs)
@@ -541,12 +555,19 @@ class User:
@property @property
def host(self): def host(self):
"""Get the *host* for my server (proto://domain[:port])""" """Get the *host* for my server (proto://domain[:port])"""
# FIXME: escaped_name probably isn't escaped enough in general for a domain fragment # if subdomains are used, use our domain
parsed = urlparse(self.settings['subdomain_host'])
h = f'{parsed.scheme}://{self.domain}' if self.settings.get('subdomain_host'):
if parsed.port: parsed = urlparse(self.settings['subdomain_host'])
h += ':%i' % parsed.port h = f"{parsed.scheme}://{self.domain}"
return h if parsed.port:
h = f"{h}:{parsed.port}"
return h
elif self.settings.get("public_url"):
# no subdomain, use public host url without path
return urlunparse(self.settings["public_url"]._replace(path=""))
else:
return ""
@property @property
def url(self): def url(self):
@@ -554,8 +575,8 @@ class User:
Full name.domain/path if using subdomains, otherwise just my /base/url Full name.domain/path if using subdomains, otherwise just my /base/url
""" """
if self.settings.get('subdomain_host'): if self.settings.get("subdomain_host"):
return f'{self.host}{self.base_url}' return f"{self.host}{self.base_url}"
else: else:
return self.base_url return self.base_url
@@ -566,6 +587,24 @@ class User:
else: else:
return url_path_join(self.url, url_escape_path(server_name), "/") return url_path_join(self.url, url_escape_path(server_name), "/")
def public_url(self, server_name=''):
"""Get the public URL of a server by name
Like server_url, but empty if no public URL is specified
"""
# server_url will be a full URL if using subdomains
url = self.server_url(server_name)
if "://" not in url:
# not using subdomains, public URL may be specified
if self.settings.get("public_url"):
# add server's base_url path prefix to public host
url = urlunparse(self.settings["public_url"]._replace(path=url))
else:
# no public url (from subdomain or host),
# leave unspecified
url = ""
return url
def progress_url(self, server_name=''): def progress_url(self, server_name=''):
"""API URL for progress endpoint for a server with a given name""" """API URL for progress endpoint for a server with a given name"""
url_parts = [self.settings['hub'].base_url, 'api/users', self.escaped_name] url_parts = [self.settings['hub'].base_url, 'api/users', self.escaped_name]