From c6ed41e322a1c7ddca714c6ff6cd13b92aaeb2bb Mon Sep 17 00:00:00 2001
From: Min RK <benjaminrk@gmail.com>
Date: Thu, 28 Apr 2022 13:35:37 +0200
Subject: [PATCH] don't confuse :// in next_url query params for a redirect
 hostname

---
 jupyterhub/handlers/base.py    | 31 +++++++++++++++++--------------
 jupyterhub/tests/test_pages.py |  8 +++++++-
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py
index 06d77b74..bc4ca548 100644
--- a/jupyterhub/handlers/base.py
+++ b/jupyterhub/handlers/base.py
@@ -635,29 +635,32 @@ class BaseHandler(RequestHandler):
         next_url = next_url.replace('\\', '%5C')
         proto = get_browser_protocol(self.request)
         host = self.request.host
+        if next_url.startswith("///"):
+            # strip more than 2 leading // down to 2
+            # because urlparse treats that as empty netloc,
+            # whereas browsers treat more than two leading // the same as //,
+            # so netloc is the first non-/ bit
+            next_url = "//" + next_url.lstrip("/")
+        parsed_next_url = urlparse(next_url)
 
         if (next_url + '/').startswith((f'{proto}://{host}/', f'//{host}/',)) or (
             self.subdomain_host
-            and urlparse(next_url).netloc
-            and ("." + urlparse(next_url).netloc).endswith(
+            and parsed_next_url.netloc
+            and ("." + parsed_next_url.netloc).endswith(
                 "." + urlparse(self.subdomain_host).netloc
             )
         ):
             # treat absolute URLs for our host as absolute paths:
-            # below, redirects that aren't strictly paths
-            parsed = urlparse(next_url)
-            next_url = parsed.path
-            if parsed.query:
-                next_url = next_url + '?' + parsed.query
-            if parsed.fragment:
-                next_url = next_url + '#' + parsed.fragment
+            # below, redirects that aren't strictly paths are rejected
+            next_url = parsed_next_url.path
+            if parsed_next_url.query:
+                next_url = next_url + '?' + parsed_next_url.query
+            if parsed_next_url.fragment:
+                next_url = next_url + '#' + parsed_next_url.fragment
+            parsed_next_url = urlparse(next_url)
 
         # if it still has host info, it didn't match our above check for *this* host
-        if next_url and (
-            '://' in next_url
-            or next_url.startswith('//')
-            or not next_url.startswith('/')
-        ):
+        if next_url and (parsed_next_url.netloc or not next_url.startswith('/')):
             self.log.warning("Disallowing redirect outside JupyterHub: %r", next_url)
             next_url = ''
 
diff --git a/jupyterhub/tests/test_pages.py b/jupyterhub/tests/test_pages.py
index 5d3911bb..5293051c 100644
--- a/jupyterhub/tests/test_pages.py
+++ b/jupyterhub/tests/test_pages.py
@@ -768,6 +768,10 @@ async def test_login_strip(app):
         (False, '/user/other', '/hub/user/other', None),
         (False, '/absolute', '/absolute', None),
         (False, '/has?query#andhash', '/has?query#andhash', None),
+        # :// in query string or fragment
+        (False, '/has?repo=https/host.git', '/has?repo=https/host.git', None),
+        (False, '/has?repo=https://host.git', '/has?repo=https://host.git', None),
+        (False, '/has#repo=https://host.git', '/has#repo=https://host.git', None),
         # next_url outside is not allowed
         (False, 'relative/path', '', None),
         (False, 'https://other.domain', '', None),
@@ -807,7 +811,9 @@ async def test_login_redirect(app, running, next_url, location, params):
     if params:
         url = url_concat(url, params)
     if next_url:
-        if '//' not in next_url and next_url.startswith('/'):
+        if next_url.startswith('/') and not (
+            next_url.startswith("//") or urlparse(next_url).netloc
+        ):
             next_url = ujoin(app.base_url, next_url, '')
         url = url_concat(url, dict(next=next_url))