Merge pull request #4479 from minrk/jupyterhub-public-url

add JupyterHub.public_url config
This commit is contained in:
Erik Sundell
2024-01-24 23:56:58 +01:00
committed by GitHub
10 changed files with 224 additions and 29 deletions

View File

@@ -182,6 +182,7 @@ html_context = {
linkcheck_ignore = [
r"(.*)github\.com(.*)#", # javascript based anchors
r"(.*)/#%21(.*)/(.*)", # /#!forum/jupyter - encoded anchor edge case
r"https?://(.*\.)?example\.(org|com)(/.*)?", # example links
r"https://github.com/[^/]*$", # too many github usernames / searches in changelog
"https://github.com/jupyterhub/jupyterhub/pull/", # too many PRs in changelog
"https://github.com/jupyterhub/jupyterhub/compare/", # too many comparisons in changelog

View File

@@ -138,6 +138,14 @@ JUPYTERHUB_OAUTH_SCOPES: JSON-serialized list of scopes to use for allowing ac
(deprecated in 3.0, use JUPYTERHUB_OAUTH_ACCESS_SCOPES).
JUPYTERHUB_OAUTH_ACCESS_SCOPES: JSON-serialized list of scopes to use for allowing access to the service (new in 3.0).
JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES: JSON-serialized list of scopes that can be requested by the oauth client on behalf of users (new in 3.0).
JUPYTERHUB_PUBLIC_URL: the public URL of the service,
e.g. `https://jupyterhub.example.org/services/name/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
JUPYTERHUB_PUBLIC_HUB_URL: the public URL of JupyterHub as a whole,
e.g. `https://jupyterhub.example.org/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
```
For the previous 'cull idle' Service example, these environment variables

View File

@@ -315,6 +315,14 @@ The process environment is returned by `Spawner.get_env`, which specifies the fo
- `JUPYTERHUB_OAUTH_ACCESS_SCOPES` - the scopes required to access the server (called `JUPYTERHUB_OAUTH_SCOPES` prior to 3.0)
- `JUPYTERHUB_OAUTH_CLIENT_ALLOWED_SCOPES` - the scopes the service is allowed to request.
If no scopes are requested explicitly, these scopes will be requested.
- `JUPYTERHUB_PUBLIC_URL` - the public URL of the server,
e.g. `https://jupyterhub.example.org/user/name/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
- `JUPYTERHUB_PUBLIC_HUB_URL` - the public URL of JupyterHub as a whole,
e.g. `https://jupyterhub.example.org/`.
Empty if no public URL is specified (default).
Will be available if subdomains are configured.
Optional environment variables, depending on configuration:

View File

@@ -109,14 +109,20 @@ class OAuthHandler:
redirect_uri = self.get_argument('redirect_uri')
if not redirect_uri or not redirect_uri.startswith('/'):
return uri
# make absolute local redirects full URLs
# to satisfy oauthlib's absolute URI requirement
redirect_uri = (
get_browser_protocol(self.request)
+ "://"
+ self.request.host
+ redirect_uri
)
public_url = self.settings.get("public_url")
if public_url:
proto = public_url.scheme
host = public_url.netloc
else:
# guess from request
proto = get_browser_protocol(self.request)
host = self.request.host
redirect_uri = f"{proto}://{host}{redirect_uri}"
parsed_url = urlparse(uri)
query_list = parse_qsl(parsed_url.query, keep_blank_values=True)
for idx, item in enumerate(query_list):

View File

@@ -698,6 +698,61 @@ class JupyterHub(Application):
proto = 'https' if self.ssl_cert else 'http'
return proto + '://:8000'
public_url = Unicode(
"",
config=True,
help="""Set the public URL of JupyterHub
This will skip any detection of URL and protocol from requests,
which isn't always correct when JupyterHub is behind
multiple layers of proxies, etc.
Usually the failure is detecting http when it's really https.
Should include the full, public URL of JupyterHub,
including the public-facing base_url prefix
(i.e. it should include a trailing slash), e.g.
https://jupyterhub.example.org/prefix/
""",
)
@default("public_url")
def _default_public_url(self):
if self.subdomain_host:
# if subdomain_host is specified, use it by default
return self.subdomain_host + self.base_url
else:
return ""
@validate("public_url")
def _validate_public_url(self, proposal):
url = proposal.value
if not url:
# explicitly empty (default)
return url
if not url.endswith("/"):
# ensure we have a trailing slash
# for consistency with base_url
url = url + "/"
if not url.endswith(self.base_url):
if not urlparse(url).path.strip("/"):
# no path specified, add base_url and warn
url = url.rstrip("/") + self.base_url
self.log.warning(
f"Adding missing base_url {self.base_url!r} to JupyterHub.public_url = {url!r}"
)
else:
# path specified but it doesn't match, raise
raise ValueError(
f"JupyterHub.public_url = {url!r} must include base_url: {self.base_url!r}"
)
if "://" not in url:
# https by default; should be specified
url = 'https://' + url
self.log.warning(
f"Adding missing protocol 'https://' to JupyterHub.public_url = {url!r}"
)
return url
subdomain_host = Unicode(
'',
help="""Run single-user servers on subdomains of this host.
@@ -721,15 +776,18 @@ class JupyterHub(Application):
# host should include '://'
# if not specified, assume https: You have to be really explicit about HTTP!
new = 'https://' + new
self.log.warning(
f"Adding missing protocol 'https://' to JupyterHub.subdomain_host = {new!r}"
)
return new
domain = Unicode(help="domain name, e.g. 'example.com' (excludes protocol, port)")
@default('domain')
def _domain_default(self):
if not self.subdomain_host:
if not (self.public_url or self.subdomain_host):
return ''
return urlparse(self.subdomain_host).hostname
return urlparse(self.public_url or self.subdomain_host).hostname
subdomain_hook = Union(
[Callable(), Unicode()],
@@ -1941,10 +1999,15 @@ class JupyterHub(Application):
def init_hub(self):
"""Load the Hub URL config"""
if self.public_url:
# host = scheme://hostname:port (no path)
public_host = urlunparse(urlparse(self.public_url)._replace(path=""))
else:
public_host = self.subdomain_host
hub_args = dict(
base_url=self.hub_prefix,
routespec=self.hub_routespec,
public_host=self.subdomain_host,
public_host=public_host,
certfile=self.internal_ssl_cert,
keyfile=self.internal_ssl_key,
cafile=self.internal_ssl_ca,
@@ -2462,9 +2525,9 @@ class JupyterHub(Application):
"""
name = orm_service.name
if self.domain:
if self.subdomain_host:
parsed_host = urlparse(self.subdomain_host)
domain = self.subdomain_hook(name, self.domain, kind="service")
domain = self.subdomain_hook(name, parsed_host.hostname, kind="service")
host = f"{parsed_host.scheme}://{domain}"
if parsed_host.port:
host = f"{host}:{parsed_host.port}"
@@ -2526,9 +2589,9 @@ class JupyterHub(Application):
name = spec['name']
if self.domain:
if self.subdomain_host:
parsed_host = urlparse(self.subdomain_host)
domain = self.subdomain_hook(name, self.domain, kind="service")
domain = self.subdomain_hook(name, parsed_host.hostname, kind="service")
host = f"{parsed_host.scheme}://{domain}"
if parsed_host.port:
host = f"{host}:{parsed_host.port}"
@@ -2974,6 +3037,7 @@ class JupyterHub(Application):
spawner_class=self.spawner_class,
base_url=self.base_url,
default_url=self.default_url,
public_url=urlparse(self.public_url) if self.public_url else "",
cookie_secret=self.cookie_secret,
cookie_max_age_days=self.cookie_max_age_days,
redirect_to_server=self.redirect_to_server,

View File

@@ -138,6 +138,10 @@ class BaseHandler(RequestHandler):
def domain(self):
return self.settings['domain']
@property
def public_url(self):
return self.settings['public_url']
@property
def db(self):
return self.settings['db']
@@ -577,8 +581,13 @@ class BaseHandler(RequestHandler):
# tornado <4.2 have a bug that consider secure==True as soon as
# 'secure' kwarg is passed to set_secure_cookie
kwargs = {'httponly': True}
if self.request.protocol == 'https':
kwargs['secure'] = True
public_url = self.settings.get("public_url")
if public_url:
if public_url.scheme == 'https':
kwargs['secure'] = True
else:
if self.request.protocol == 'https':
kwargs['secure'] = True
kwargs.update(self.settings.get('cookie_options', {}))
kwargs.update(overrides)
@@ -670,8 +679,15 @@ class BaseHandler(RequestHandler):
next_url = self.get_argument('next', default='')
# protect against some browsers' buggy handling of backslash as slash
next_url = next_url.replace('\\', '%5C')
proto = get_browser_protocol(self.request)
host = self.request.host
public_url = self.settings.get("public_url")
if public_url:
proto = public_url.scheme
host = public_url.netloc
else:
# guess from request
proto = get_browser_protocol(self.request)
host = self.request.host
if next_url.startswith("///"):
# strip more than 2 leading // down to 2
# because urlparse treats that as empty netloc,

View File

@@ -37,7 +37,7 @@ import uuid
import warnings
from http import HTTPStatus
from unittest import mock
from urllib.parse import urlencode
from urllib.parse import urlencode, urlparse
from tornado.httpclient import AsyncHTTPClient, HTTPRequest
from tornado.httputil import url_concat
@@ -924,8 +924,13 @@ class HubOAuth(HubAuth):
# OAuth that doesn't complete shouldn't linger too long.
'max_age': 600,
}
if get_browser_protocol(handler.request) == 'https':
kwargs['secure'] = True
public_url = os.getenv("JUPYTERHUB_PUBLIC_URL")
if public_url:
if urlparse(public_url).scheme == 'https':
kwargs['secure'] = True
else:
if get_browser_protocol(handler.request) == 'https':
kwargs['secure'] = True
# load user cookie overrides
kwargs.update(self.cookie_options)
handler.set_secure_cookie(cookie_name, b64_state, **kwargs)

View File

@@ -162,6 +162,8 @@ class Spawner(LoggingConfigurable):
hub = Any()
orm_spawner = Any()
cookie_options = Dict()
public_url = Unicode(help="Public URL of this spawner's server")
public_hub_url = Unicode(help="Public URL of the Hub itself")
db = Any()
@@ -1047,6 +1049,10 @@ class Spawner(LoggingConfigurable):
bind_url = f"{proto}://{self.ip}:{self.port}{base_url}"
env["JUPYTERHUB_SERVICE_URL"] = bind_url
# the public URLs of this server and the Hub
env["JUPYTERHUB_PUBLIC_URL"] = self.public_url
env["JUPYTERHUB_PUBLIC_HUB_URL"] = self.public_hub_url
# Put in limit and guarantee info if they exist.
# Note that this is for use by the humans / notebook extensions in the
# single-user notebook server, and not for direct usage by the spawners

View File

@@ -1,3 +1,6 @@
from unittest import mock
from urllib.parse import urlparse
import pytest
from .. import orm
@@ -66,3 +69,42 @@ def test_sync_groups(app, user, group_names):
def test_server_url(app, user, server_name, path):
user_url = user.url
assert user.server_url(server_name) == user_url + path
@pytest.mark.parametrize(
"server_name, public_url, subdomain_host, expected_url",
[
("", "", "", ""),
("name", "", "", ""),
("", "https://hub.tld/PREFIX/", "", "https://hub.tld/PREFIX/user/USERNAME/"),
(
"name",
"https://hub.tld/PREFIX/",
"",
"https://hub.tld/PREFIX/user/USERNAME/name/",
),
(
"name",
"",
"https://hub.tld:123",
"https://USERNAME.hub.tld:123/PREFIX/user/USERNAME/name/",
),
],
)
def test_public_url(app, user, server_name, public_url, subdomain_host, expected_url):
expected_url = expected_url.replace("USERNAME", user.escaped_name).replace(
"PREFIX", app.base_url.strip("/")
)
if public_url:
public_url = public_url.replace("PREFIX", app.base_url.strip("/"))
public_url = urlparse(public_url)
with mock.patch.dict(
user.settings,
{
"subdomain_host": subdomain_host,
"domain": urlparse(subdomain_host).hostname,
"public_url": public_url,
},
):
public_server_url = user.public_url(server_name)
assert public_server_url == expected_url

View File

@@ -4,7 +4,7 @@ import json
import warnings
from collections import defaultdict
from datetime import timedelta
from urllib.parse import quote, urlparse
from urllib.parse import quote, urlparse, urlunparse
from sqlalchemy import inspect
from tornado import gen, web
@@ -438,6 +438,20 @@ class User:
)
spawn_kwargs.update(ssl_kwargs)
# public URLs
if self.settings.get("public_url"):
public_url = self.settings["public_url"]
hub = self.settings.get('hub')
if hub is None:
# only in mock tests
hub_path = "/hub/"
else:
hub_path = hub.base_url
spawn_kwargs["public_hub_url"] = urlunparse(
public_url._replace(path=hub_path)
)
spawn_kwargs["public_url"] = self.public_url(server_name)
# update with kwargs. Mainly for testing.
spawn_kwargs.update(kwargs)
spawner = spawner_class(**spawn_kwargs)
@@ -541,12 +555,19 @@ class User:
@property
def host(self):
"""Get the *host* for my server (proto://domain[:port])"""
# FIXME: escaped_name probably isn't escaped enough in general for a domain fragment
parsed = urlparse(self.settings['subdomain_host'])
h = f'{parsed.scheme}://{self.domain}'
if parsed.port:
h += ':%i' % parsed.port
return h
# if subdomains are used, use our domain
if self.settings.get('subdomain_host'):
parsed = urlparse(self.settings['subdomain_host'])
h = f"{parsed.scheme}://{self.domain}"
if parsed.port:
h = f"{h}:{parsed.port}"
return h
elif self.settings.get("public_url"):
# no subdomain, use public host url without path
return urlunparse(self.settings["public_url"]._replace(path=""))
else:
return ""
@property
def url(self):
@@ -554,8 +575,8 @@ class User:
Full name.domain/path if using subdomains, otherwise just my /base/url
"""
if self.settings.get('subdomain_host'):
return f'{self.host}{self.base_url}'
if self.settings.get("subdomain_host"):
return f"{self.host}{self.base_url}"
else:
return self.base_url
@@ -566,6 +587,24 @@ class User:
else:
return url_path_join(self.url, url_escape_path(server_name), "/")
def public_url(self, server_name=''):
"""Get the public URL of a server by name
Like server_url, but empty if no public URL is specified
"""
# server_url will be a full URL if using subdomains
url = self.server_url(server_name)
if "://" not in url:
# not using subdomains, public URL may be specified
if self.settings.get("public_url"):
# add server's base_url path prefix to public host
url = urlunparse(self.settings["public_url"]._replace(path=url))
else:
# no public url (from subdomain or host),
# leave unspecified
url = ""
return url
def progress_url(self, server_name=''):
"""API URL for progress endpoint for a server with a given name"""
url_parts = [self.settings['hub'].base_url, 'api/users', self.escaped_name]