0.8.0b5

Merge pull request #1393 from minrk/spawn-future
improve reporting of spawn failure
2025-10-07 18:14:10 +00:00 · 2017-09-08 11:19:25 +02:00 · 2017-09-07 10:20:38 -07:00 · 2017-09-07 09:59:58 -07:00 · 2017-09-07 16:48:13 +02:00 · 2017-09-07 15:10:48 +02:00
11 changed files with 156 additions and 36 deletions
--- a/docs/source/reference/config-examples.md
+++ b/docs/source/reference/config-examples.md
@@ -49,9 +49,6 @@ c.JupyterHub.cookie_secret_file = pjoin(runtime_dir, 'cookie_secret')
 c.JupyterHub.db_url = pjoin(runtime_dir, 'jupyterhub.sqlite')
 # or `--db=/path/to/jupyterhub.sqlite` on the command-line

-# put the log file in /var/log
-c.JupyterHub.extra_log_file = '/var/log/jupyterhub.log'
-
 # use GitHub OAuthenticator for local users
 c.JupyterHub.authenticator_class = 'oauthenticator.LocalGitHubOAuthenticator'
 c.GitHubOAuthenticator.oauth_callback_url = os.environ['OAUTH_CALLBACK_URL']
@@ -79,7 +76,8 @@ export GITHUB_CLIENT_ID=github_id
 export GITHUB_CLIENT_SECRET=github_secret
 export OAUTH_CALLBACK_URL=https://example.com/hub/oauth_callback
 export CONFIGPROXY_AUTH_TOKEN=super-secret
-jupyterhub -f /etc/jupyterhub/jupyterhub_config.py
+# append log output to log file /var/log/jupyterhub.log
+jupyterhub -f /etc/jupyterhub/jupyterhub_config.py &>> /var/log/jupyterhub.log
 ```

 ## Using nginx reverse proxy
--- a/examples/cull-idle/cull_idle_servers.py
+++ b/examples/cull-idle/cull_idle_servers.py
@@ -40,8 +40,11 @@ from tornado.options import define, options, parse_command_line


@coroutine
-def cull_idle(url, api_token, timeout):
-    """cull idle single-user servers"""
+def cull_idle(url, api_token, timeout, cull_users=False):
+    """Shutdown idle single-user servers
+
+    If cull_users, inactive *users* will be deleted as well.
+    """
    auth_header = {
            'Authorization': 'token %s' % api_token
        }
@@ -54,26 +57,50 @@ def cull_idle(url, api_token, timeout):
    resp = yield client.fetch(req)
    users = json.loads(resp.body.decode('utf8', 'replace'))
    futures = []
-    for user in users:
-        last_activity = parse_date(user['last_activity'])
-        if user['server'] and last_activity < cull_limit:
-            app_log.info("Culling %s (inactive since %s)", user['name'], last_activity)
+
+    @coroutine
+    def cull_one(user, last_activity):
+        """cull one user"""
+
+        # shutdown server first. Hub doesn't allow deleting users with running servers.
+        if user['server']:
+            app_log.info("Culling server for %s (inactive since %s)", user['name'], last_activity)
            req = HTTPRequest(url=url + '/users/%s/server' % user['name'],
                method='DELETE',
                headers=auth_header,
            )
-            futures.append((user['name'], client.fetch(req)))
-        elif user['server'] and last_activity > cull_limit:
+            yield client.fetch(req)
+        if cull_users:
+            app_log.info("Culling user %s (inactive since %s)", user['name'], last_activity)
+            req = HTTPRequest(url=url + '/users/%s' % user['name'],
+                method='DELETE',
+                headers=auth_header,
+            )
+            yield client.fetch(req)
+
+    for user in users:
+        if not user['server'] and not cull_users:
+            # server not running and not culling users, nothing to do
+            continue
+        last_activity = parse_date(user['last_activity'])
+        if last_activity < cull_limit:
+            futures.append((user['name'], cull_one(user, last_activity)))
+        else:
            app_log.debug("Not culling %s (active since %s)", user['name'], last_activity)
    
    for (name, f) in futures:
        yield f
        app_log.debug("Finished culling %s", name)

+
 if __name__ == '__main__':
    define('url', default=os.environ.get('JUPYTERHUB_API_URL'), help="The JupyterHub API URL")
    define('timeout', default=600, help="The idle timeout (in seconds)")
    define('cull_every', default=0, help="The interval (in seconds) for checking for idle servers to cull")
+    define('cull_users', default=False,
+        help="""Cull users in addition to servers.
+                This is for use in temporary-user cases such as tmpnb.""",
+    )
    
    parse_command_line()
    if not options.cull_every:
@@ -82,7 +109,7 @@ if __name__ == '__main__':
    api_token = os.environ['JUPYTERHUB_API_TOKEN']
    
    loop = IOLoop.current()
-    cull = lambda : cull_idle(options.url, api_token, options.timeout)
+    cull = lambda : cull_idle(options.url, api_token, options.timeout, options.cull_users)
    # run once before scheduling periodic call
    loop.run_sync(cull)
    # schedule periodic cull
--- a/jupyterhub/_version.py
+++ b/jupyterhub/_version.py
@@ -7,7 +7,7 @@ version_info = (
    0,
    8,
    0,
-    'b4',
+    'b5',
 )

 __version__ = '.'.join(map(str, version_info))
--- a/jupyterhub/app.py
+++ b/jupyterhub/app.py
@@ -801,12 +801,10 @@ class JupyterHub(Application):
        self.handlers = self.add_url_prefix(self.hub_prefix, h)
        # some extra handlers, outside hub_prefix
        self.handlers.extend([
-            (r"%s" % self.hub_prefix.rstrip('/'), web.RedirectHandler,
-                {
-                    "url": self.hub_prefix,
-                    "permanent": False,
-                }
-            ),
+            # add trailing / to `/hub`
+            (self.hub_prefix.rstrip('/'), handlers.AddSlashHandler),
+            # add trailing / to ``/user|services/:name`
+            (r"%s(user|services)/([^/]+)" % self.base_url, handlers.AddSlashHandler),
            (r"(?!%s).*" % self.hub_prefix, handlers.PrefixRedirectHandler),
            (r'(.*)', handlers.Template404),
        ])
--- a/jupyterhub/handlers/base.py
+++ b/jupyterhub/handlers/base.py
@@ -376,6 +376,9 @@ class BaseHandler(RequestHandler):

    @gen.coroutine
    def spawn_single_user(self, user, server_name='', options=None):
+        # in case of error, include 'try again from /hub/home' message
+        self.extra_error_html = self.spawn_home_error
+
        user_server_name = user.name
        if self.allow_named_servers and not server_name:
            server_name = default_server_name(user)
@@ -440,11 +443,7 @@ class BaseHandler(RequestHandler):
            otherwise it is called immediately.
            """
            # wait for spawn Future
-            try:
-                yield spawn_future
-            except Exception:
-                spawner._spawn_pending = False
-                raise
+            yield spawn_future
            toc = IOLoop.current().time()
            self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic)
            self.statsd.timing('spawner.success', (toc - tic) * 1000)
@@ -459,10 +458,22 @@ class BaseHandler(RequestHandler):
                spawner.add_poll_callback(self.user_stopped, user, server_name)
            finally:
                spawner._proxy_pending = False
-                spawner._spawn_pending = False
+
+        # hook up spawner._spawn_future so that other requests can await
+        # this result
+        finish_spawn_future = spawner._spawn_future = finish_user_spawn()
+        def _clear_spawn_future(f):
+            # clear spawner._spawn_future when it's done
+            # keep an exception around, though, to prevent repeated implicit spawns
+            # if spawn is failing
+            if f.exception() is None:
+                spawner._spawn_future = None
+            # Now we're all done. clear _spawn_pending flag
+            spawner._spawn_pending = False
+        finish_spawn_future.add_done_callback(_clear_spawn_future)

        try:
-            yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), finish_user_spawn())
+            yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), finish_spawn_future)
        except gen.TimeoutError:
            # waiting_for_response indicates server process has started,
            # but is yet to become responsive.
@@ -479,7 +490,8 @@ class BaseHandler(RequestHandler):
            if status is not None:
                toc = IOLoop.current().time()
                self.statsd.timing('spawner.failure', (toc - tic) * 1000)
-                raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status)
+                raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % (
+                    status, spawner._log_name))

            if spawner._waiting_for_response:
                # hit timeout waiting for response, but server's running.
@@ -549,6 +561,19 @@ class BaseHandler(RequestHandler):
    # template rendering
    #---------------------------------------------------------------

+    @property
+    def spawn_home_error(self):
+        """Extra message pointing users to try spawning again from /hub/home.
+
+        Should be added to `self.extra_error_html` for any handler
+        that could serve a failed spawn message.
+        """
+        home = url_path_join(self.hub.base_url, 'home')
+        return (
+            "You can try restarting your server from the "
+            "<a href='{home}'>home page</a>.".format(home=home)
+        )
+
    def get_template(self, name):
        """Return the jinja template object for a given name"""
        return self.settings['jinja2_env'].get_template(name)
@@ -596,6 +621,7 @@ class BaseHandler(RequestHandler):
            status_code=status_code,
            status_message=status_message,
            message=message,
+            extra_error_html=getattr(self, 'extra_error_html', ''),
            exception=exception,
        )

@@ -649,10 +675,13 @@ class UserSpawnHandler(BaseHandler):
        current_user = self.get_current_user()

        if current_user and current_user.name == name:
+            # if spawning fails for any reason, point users to /hub/home to retry
+            self.extra_error_html = self.spawn_home_error
+
            # If people visit /user/:name directly on the Hub,
            # the redirects will just loop, because the proxy is bypassed.
            # Try to check for that and warn,
-            # though the user-facing behavior is unchainged
+            # though the user-facing behavior is unchanged
            host_info = urlparse(self.request.full_url())
            port = host_info.port
            if not port:
@@ -664,8 +693,34 @@ class UserSpawnHandler(BaseHandler):
                    Make sure to connect to the proxied public URL %s
                    """, self.request.full_url(), self.proxy.public_url)

-            # logged in as correct user, spawn the server
+            # logged in as correct user, check for pending spawn
            spawner = current_user.spawner
+
+            # First, check for previous failure.
+            if (
+                not spawner.active
+                and spawner._spawn_future
+                and spawner._spawn_future.done()
+                and spawner._spawn_future.exception()
+            ):
+                # Condition: spawner not active and _spawn_future exists and contains an Exception
+                # Implicit spawn on /user/:name is not allowed if the user's last spawn failed.
+                # We should point the user to Home if the most recent spawn failed.
+                self.log.error("Preventing implicit spawn for %s because last spawn failed: %s",
+                    spawner._log_name, spawner._spawn_future.exception())
+                raise spawner._spawn_future.exception()
+
+            # check for pending spawn
+            if spawner.pending and spawner._spawn_future:
+                # wait on the pending spawn
+                self.log.debug("Waiting for %s pending %s", spawner._log_name, spawner.pending)
+                try:
+                    yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), spawner._spawn_future)
+                except gen.TimeoutError:
+                    self.log.info("Pending spawn for %s didn't finish in %.1f seconds", spawner._log_name, self.slow_spawn_timeout)
+                    pass
+
+            # we may have waited above, check pending again:
            if spawner.pending:
                self.log.info("%s is pending %s", spawner._log_name, spawner.pending)
                # spawn has started, but not finished
@@ -679,6 +734,8 @@ class UserSpawnHandler(BaseHandler):
                status = yield spawner.poll()
            else:
                status = 0
+
+            # server is not running, trigger spawn
            if status is not None:
                if spawner.options_form:
                    self.redirect(url_concat(url_path_join(self.hub.base_url, 'spawn'),
@@ -687,6 +744,15 @@ class UserSpawnHandler(BaseHandler):
                else:
                    yield self.spawn_single_user(current_user)

+            # spawn didn't finish, show pending page
+            if spawner.pending:
+                self.log.info("%s is pending %s", spawner._log_name, spawner.pending)
+                # spawn has started, but not finished
+                self.statsd.incr('redirects.user_spawn_pending', 1)
+                html = self.render_template("spawn_pending.html", user=current_user)
+                self.finish(html)
+                return
+
            # We do exponential backoff here - since otherwise we can get stuck in a redirect loop!
            # This is important in many distributed proxy implementations - those are often eventually
            # consistent and can take upto a couple of seconds to actually apply throughout the cluster.
@@ -786,6 +852,13 @@ class CSPReportHandler(BaseHandler):
        self.statsd.incr('csp_report')


+class AddSlashHandler(BaseHandler):
+    """Handler for adding trailing slash to URLs that need them"""
+    def get(self, *args):
+        src = urlparse(self.request.uri)
+        dest = src._replace(path=src.path + '/')
+        self.redirect(urlunparse(dest))
+
 default_handlers = [
    (r'/user/([^/]+)(/.*)?', UserSpawnHandler),
    (r'/user-redirect/(.*)?', UserRedirectHandler),
--- a/jupyterhub/handlers/pages.py
+++ b/jupyterhub/handlers/pages.py
@@ -67,9 +67,13 @@ class HomeHandler(BaseHandler):
        if user.running:
            # trigger poll_and_notify event in case of a server that died
            yield user.spawner.poll_and_notify()
+        # send the user to /spawn if they aren't running,
+        # to establish that this is an explicit spawn request rather
+        # than an implicit one, which can be caused by any link to `/user/:name`
+        url = user.url if user.running else url_path_join(self.hub.base_url, 'spawn')
        html = self.render_template('home.html',
            user=user,
-            url=user.url,
+            url=url,
        )
        self.finish(html)

@@ -92,7 +96,10 @@ class SpawnHandler(BaseHandler):

    @web.authenticated
    def get(self):
-        """GET renders form for spawning with user-specified options"""
+        """GET renders form for spawning with user-specified options
+
+        or triggers spawn via redirect if there is no form.
+        """
        user = self.get_current_user()
        if not self.allow_named_servers and user.running:
            url = user.url
@@ -102,7 +109,12 @@ class SpawnHandler(BaseHandler):
        if user.spawner.options_form:
            self.finish(self._render_form())
        else:
-            # not running, no form. Trigger spawn.
+            # Explicit spawn request: clear _spawn_future
+            # which may have been saved to prevent implicit spawns
+            # after a failure.
+            if user.spawner._spawn_future and user.spawner._spawn_future.done():
+                user.spawner._spawn_future = None
+            # not running, no form. Trigger spawn by redirecting to /user/:name
            self.redirect(user.url)

    @web.authenticated
--- a/jupyterhub/spawner.py
+++ b/jupyterhub/spawner.py
@@ -54,6 +54,7 @@ class Spawner(LoggingConfigurable):
    _proxy_pending = False
    _waiting_for_response = False
    _jupyterhub_version = None
+    _spawn_future = None

    @property
    def _log_name(self):
--- a/jupyterhub/tests/test_pages.py
+++ b/jupyterhub/tests/test_pages.py
@@ -126,7 +126,7 @@ def test_spawn_redirect(app):
    # should have started server
    status = yield u.spawner.poll()
    assert status is None
-    
+
    # test spawn page when server is already running (just redirect)
    r = yield get_page('spawn', app, cookies=cookies)
    r.raise_for_status()
@@ -134,6 +134,12 @@ def test_spawn_redirect(app):
    path = urlparse(r.url).path
    assert path == ujoin(app.base_url, '/user/%s/' % name)

+    # test handing of trailing slash on `/user/name`
+    r = yield get_page('user/' + name, app, cookies=cookies)
+    r.raise_for_status()
+    path = urlparse(r.url).path
+    assert path == ujoin(app.base_url, '/user/%s/' % name)
+

@pytest.mark.gen_test
 def test_spawn_page(app):
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,5 @@ tornado>=4.1
 jinja2
 pamela
 python-oauth2>=1.0
-SQLAlchemy>=1.0
+SQLAlchemy>=1.1
 requests
--- a/share/jupyter/hub/templates/error.html
+++ b/share/jupyter/hub/templates/error.html
@@ -22,6 +22,11 @@
    {{message_html | safe}}
  </p>
  {% endif %}
+  {% if extra_error_html %}
+  <p>
+    {{extra_error_html | safe}}
+  </p>
+  {% endif %}
  {% endblock error_detail %}
 </div>

--- a/share/jupyter/hub/templates/login.html
+++ b/share/jupyter/hub/templates/login.html
@@ -8,7 +8,7 @@
 {% block login %}
 <div id="login-main" class="container">
 {% if custom_html %}
-{{ custom_html }}
+{{ custom_html | safe }}
 {% elif login_service %}
 <div class="service-login">
  <a role="button" class='btn btn-jupyter btn-lg' href='{{authenticator_login_url}}'>
Author	SHA1	Message	Date
Min RK	6c89de082f	0.8.0b5	2017-09-08 11:19:25 +02:00
Carol Willing	6fb31cc613	Merge pull request #1393 from minrk/spawn-future improve reporting of spawn failure	2017-09-07 10:20:38 -07:00
Carol Willing	cfb22baf05	Merge pull request #1399 from minrk/trailing-slash add trailing slash on /user/name	2017-09-07 09:59:58 -07:00
Min RK	2d0c1ff0a8	Merge pull request #1404 from minrk/sqla-11 we require sqlalchemy 1.1	2017-09-07 16:48:13 +02:00
Min RK	7789e13879	we require sqlalchemy 1.1 for enum support [ref](http://docs.sqlalchemy.org/en/latest/changelog/changelog_11.html#change-9d6d98d7acabc8564b8eebb11c28a624)	2017-09-07 15:10:48 +02:00
Yuvi Panda	f7b90e2c09	Merge pull request #1400 from minrk/auth-custom-html allow Authenticator.custom_html to be HTML	2017-09-06 11:56:14 -07:00
Carol Willing	ccb29167dd	Merge pull request #1392 from minrk/rm-extra-log update docs to preferred method of writing to log file	2017-09-06 07:32:25 -07:00
Min RK	4ef1eca3c9	allow Authenticator.custom_html to be HTML	2017-09-06 15:14:26 +02:00
Min RK	c26ede30b9	Point users to /hub/home to retry spawn on spawn failure	2017-09-06 15:03:26 +02:00
Min RK	64c69a3164	update docs to preferred method of writing to log file extra_log_files config is unreliable and doesn't capture all output. Piping output is much more robust and reliable.	2017-09-06 14:38:33 +02:00
Min RK	ad7867ff11	add trailing slash on /user/name proxies may not route `/user/name` correctly, only `/user/name/...`, so make sure that `/user/name` is redirected to `/user/name/` this manifests as a redirect loop between /user/name and /hub/user/name when a route exists but /user/name is still being routed to the Hub	2017-09-06 12:37:22 +02:00
Yuvi Panda	14fc1588f8	Merge pull request #1380 from minrk/cull-idle-users add —cull-users to cull_idle_servers	2017-09-05 12:48:24 -07:00
Min RK	7e5a925f4f	raise original spawn failure on implicit spawn so the error message is the same, however it was arrived at. potential downside: it could look like the current request is spawning and failing, rather than the reality that a previous spawn failed and we are just re-presenting the earlier error. It's possible for there to have been a long time in between spawn and error.	2017-09-04 14:27:01 +02:00
Min RK	3c61e422da	prevent implicit spawn on `/user/:name` if previous spawn failed require users to visit /hub/home and click 'Start My Server' to get a new server Visits to /hub/user/:name will get an error if the previous spawn failed, rather than triggering a new spawn. This should guarantee that a user sees an error if their spawn failed, regardless of when the failure occurred and how long it took. Some cases of slow errors could result in triggering a new spawn indefinitely without the user seeing an error message. /hub/spawn was a simple redirect to /user/:name in the absence of a spawn form, but now clears the `_spawn_future` prior to redirect to signal that a new spawn has been explicitly requested in the case of a prior failure.	2017-09-04 14:17:24 +02:00
Min RK	0e2cf37981	point to single-user logs when spawner fails to start	2017-09-04 13:14:07 +02:00
Min RK	503d5e389f	render pending page if triggered spawn doesn't finish instead of redirecting, which starts redirect loop counter	2017-09-04 12:02:40 +02:00
Min RK	7b1e61ab2c	allow waiting for pending spawn via spawner._spawn_future avoids losing errors when visiting `/hub/user/:name` during a pending spawn	2017-09-04 11:53:42 +02:00
Min RK	f9a90d2494	add —cull-users to cull_idle_servers allows deleting idle users in addition to servers for temp-user cases such as binder/tmpnb	2017-08-30 10:31:44 +02:00