From 7b1e61ab2ce95d558a86d5a457ba28176f38a9ba Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 4 Sep 2017 11:53:42 +0200 Subject: [PATCH 1/6] allow waiting for pending spawn via spawner._spawn_future avoids losing errors when visiting `/hub/user/:name` during a pending spawn --- jupyterhub/handlers/base.py | 27 ++++++++++++++++++++++++--- jupyterhub/spawner.py | 1 + 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index a9575ee0..97446b49 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -461,8 +461,17 @@ class BaseHandler(RequestHandler): spawner._proxy_pending = False spawner._spawn_pending = False + # hook up spawner._spawn_future so that other requests can await + # this result + finish_spawn_future = spawner._spawn_future = finish_user_spawn() + def _clear_spawn_future(f): + # clear spawner._spawn_future when it's done + spawner._spawn_future = None + finish_spawn_future.add_done_callback(_clear_spawn_future) + + try: - yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), finish_user_spawn()) + yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), finish_spawn_future) except gen.TimeoutError: # waiting_for_response indicates server process has started, # but is yet to become responsive. @@ -652,7 +661,7 @@ class UserSpawnHandler(BaseHandler): # If people visit /user/:name directly on the Hub, # the redirects will just loop, because the proxy is bypassed. # Try to check for that and warn, - # though the user-facing behavior is unchainged + # though the user-facing behavior is unchanged host_info = urlparse(self.request.full_url()) port = host_info.port if not port: @@ -664,8 +673,18 @@ class UserSpawnHandler(BaseHandler): Make sure to connect to the proxied public URL %s """, self.request.full_url(), self.proxy.public_url) - # logged in as correct user, spawn the server + # logged in as correct user, check for pending spawn spawner = current_user.spawner + if spawner.pending and spawner._spawn_future: + # wait on the pending spawn + self.log.debug("Waiting for %s pending %s", spawner._log_name, spawner.pending) + try: + yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), spawner._spawn_future) + except gen.TimeoutError: + self.log.info("Pending spawn for %s didn't finish in %.1f seconds", spawner._log_name, self.slow_spawn_timeout) + pass + + # we may have waited above, check pending again: if spawner.pending: self.log.info("%s is pending %s", spawner._log_name, spawner.pending) # spawn has started, but not finished @@ -679,6 +698,8 @@ class UserSpawnHandler(BaseHandler): status = yield spawner.poll() else: status = 0 + + # server is not running, trigger spawn if status is not None: if spawner.options_form: self.redirect(url_concat(url_path_join(self.hub.base_url, 'spawn'), diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index d124e3f3..60b322ed 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -54,6 +54,7 @@ class Spawner(LoggingConfigurable): _proxy_pending = False _waiting_for_response = False _jupyterhub_version = None + _spawn_future = None @property def _log_name(self): From 503d5e389fe94e8a6f89dc43069d25e5b477524b Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 4 Sep 2017 12:02:40 +0200 Subject: [PATCH 2/6] render pending page if triggered spawn doesn't finish instead of redirecting, which starts redirect loop counter --- jupyterhub/handlers/base.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 97446b49..0cbf6a99 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -708,6 +708,15 @@ class UserSpawnHandler(BaseHandler): else: yield self.spawn_single_user(current_user) + # spawn didn't finish, show pending page + if spawner.pending: + self.log.info("%s is pending %s", spawner._log_name, spawner.pending) + # spawn has started, but not finished + self.statsd.incr('redirects.user_spawn_pending', 1) + html = self.render_template("spawn_pending.html", user=current_user) + self.finish(html) + return + # We do exponential backoff here - since otherwise we can get stuck in a redirect loop! # This is important in many distributed proxy implementations - those are often eventually # consistent and can take upto a couple of seconds to actually apply throughout the cluster. From 0e2cf37981957257b68175f0f9d63ce1de2b494e Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 4 Sep 2017 13:02:56 +0200 Subject: [PATCH 3/6] point to single-user logs when spawner fails to start --- jupyterhub/handlers/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 0cbf6a99..c116fd51 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -488,7 +488,8 @@ class BaseHandler(RequestHandler): if status is not None: toc = IOLoop.current().time() self.statsd.timing('spawner.failure', (toc - tic) * 1000) - raise web.HTTPError(500, "Spawner failed to start [status=%s]" % status) + raise web.HTTPError(500, "Spawner failed to start [status=%s]. The logs for %s may contain details." % ( + status, spawner._log_name)) if spawner._waiting_for_response: # hit timeout waiting for response, but server's running. From 3c61e422da87a980cf170e09349eb74814441ddf Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 4 Sep 2017 14:17:24 +0200 Subject: [PATCH 4/6] prevent implicit spawn on `/user/:name` if previous spawn failed require users to visit /hub/home and click 'Start My Server' to get a new server Visits to /hub/user/:name will get an error if the previous spawn failed, rather than triggering a new spawn. This should guarantee that a user sees an error if their spawn failed, regardless of when the failure occurred and how long it took. Some cases of slow errors could result in triggering a new spawn indefinitely without the user seeing an error message. /hub/spawn was a simple redirect to /user/:name in the absence of a spawn form, but now clears the `_spawn_future` prior to redirect to signal that a new spawn has been explicitly requested in the case of a prior failure. --- jupyterhub/handlers/base.py | 32 ++++++++++++++++++++++++-------- jupyterhub/handlers/pages.py | 18 +++++++++++++++--- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index c116fd51..741076f3 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -440,11 +440,7 @@ class BaseHandler(RequestHandler): otherwise it is called immediately. """ # wait for spawn Future - try: - yield spawn_future - except Exception: - spawner._spawn_pending = False - raise + yield spawn_future toc = IOLoop.current().time() self.log.info("User %s took %.3f seconds to start", user_server_name, toc-tic) self.statsd.timing('spawner.success', (toc - tic) * 1000) @@ -459,16 +455,19 @@ class BaseHandler(RequestHandler): spawner.add_poll_callback(self.user_stopped, user, server_name) finally: spawner._proxy_pending = False - spawner._spawn_pending = False # hook up spawner._spawn_future so that other requests can await # this result finish_spawn_future = spawner._spawn_future = finish_user_spawn() def _clear_spawn_future(f): # clear spawner._spawn_future when it's done - spawner._spawn_future = None + # keep an exception around, though, to prevent repeated implicit spawns + # if spawn is failing + if f.exception() is None: + spawner._spawn_future = None + # Now we're all done. clear _spawn_pending flag + spawner._spawn_pending = False finish_spawn_future.add_done_callback(_clear_spawn_future) - try: yield gen.with_timeout(timedelta(seconds=self.slow_spawn_timeout), finish_spawn_future) @@ -676,6 +675,23 @@ class UserSpawnHandler(BaseHandler): # logged in as correct user, check for pending spawn spawner = current_user.spawner + + # First, check for previous failure. + if ( + not spawner.active + and spawner._spawn_future + and spawner._spawn_future.done() + and spawner._spawn_future.exception() + ): + # Condition: spawner not active and _spawn_future exists and contains an Exception + # Implicit spawn on /user/:name is not allowed if the user's last spawn failed. + # Point the user to Home + # if the most recent spawn + self.log.error("Preventing implicit spawn for %s because last spawn failed: %s", + spawner._log_name, spawner._spawn_future.exception()) + raise web.HTTPError(500, "Last spawn failed. Try spawning again from the Home page.") + + # check for pending spawn if spawner.pending and spawner._spawn_future: # wait on the pending spawn self.log.debug("Waiting for %s pending %s", spawner._log_name, spawner.pending) diff --git a/jupyterhub/handlers/pages.py b/jupyterhub/handlers/pages.py index 7e51fb23..410e51dd 100644 --- a/jupyterhub/handlers/pages.py +++ b/jupyterhub/handlers/pages.py @@ -67,9 +67,13 @@ class HomeHandler(BaseHandler): if user.running: # trigger poll_and_notify event in case of a server that died yield user.spawner.poll_and_notify() + # send the user to /spawn if they aren't running, + # to establish that this is an explicit spawn request rather + # than an implicit one, which can be caused by any link to `/user/:name` + url = user.url if user.running else url_path_join(self.hub.base_url, 'spawn') html = self.render_template('home.html', user=user, - url=user.url, + url=url, ) self.finish(html) @@ -92,7 +96,10 @@ class SpawnHandler(BaseHandler): @web.authenticated def get(self): - """GET renders form for spawning with user-specified options""" + """GET renders form for spawning with user-specified options + + or triggers spawn via redirect if there is no form. + """ user = self.get_current_user() if not self.allow_named_servers and user.running: url = user.url @@ -102,7 +109,12 @@ class SpawnHandler(BaseHandler): if user.spawner.options_form: self.finish(self._render_form()) else: - # not running, no form. Trigger spawn. + # Explicit spawn request: clear _spawn_future + # which may have been saved to prevent implicit spawns + # after a failure. + if user.spawner._spawn_future and user.spawner._spawn_future.done(): + user.spawner._spawn_future = None + # not running, no form. Trigger spawn by redirecting to /user/:name self.redirect(user.url) @web.authenticated From 7e5a925f4fad61cd54725fa562f5ff8eb13ea386 Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 4 Sep 2017 14:27:01 +0200 Subject: [PATCH 5/6] raise original spawn failure on implicit spawn so the error message is the same, however it was arrived at. potential downside: it could look like the current request is spawning and failing, rather than the reality that a previous spawn failed and we are just re-presenting the earlier error. It's possible for there to have been a long time in between spawn and error. --- jupyterhub/handlers/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 741076f3..a02ac5f3 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -689,7 +689,7 @@ class UserSpawnHandler(BaseHandler): # if the most recent spawn self.log.error("Preventing implicit spawn for %s because last spawn failed: %s", spawner._log_name, spawner._spawn_future.exception()) - raise web.HTTPError(500, "Last spawn failed. Try spawning again from the Home page.") + raise spawner._spawn_future.exception() # check for pending spawn if spawner.pending and spawner._spawn_future: From c26ede30b9796803b9616ab7588b2aa6a15fe00e Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 6 Sep 2017 15:03:26 +0200 Subject: [PATCH 6/6] Point users to /hub/home to retry spawn on spawn failure --- jupyterhub/handlers/base.py | 23 +++++++++++++++++++++-- share/jupyter/hub/templates/error.html | 5 +++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index a02ac5f3..b98631d6 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -376,6 +376,9 @@ class BaseHandler(RequestHandler): @gen.coroutine def spawn_single_user(self, user, server_name='', options=None): + # in case of error, include 'try again from /hub/home' message + self.extra_error_html = self.spawn_home_error + user_server_name = user.name if self.allow_named_servers and not server_name: server_name = default_server_name(user) @@ -558,6 +561,19 @@ class BaseHandler(RequestHandler): # template rendering #--------------------------------------------------------------- + @property + def spawn_home_error(self): + """Extra message pointing users to try spawning again from /hub/home. + + Should be added to `self.extra_error_html` for any handler + that could serve a failed spawn message. + """ + home = url_path_join(self.hub.base_url, 'home') + return ( + "You can try restarting your server from the " + "home page.".format(home=home) + ) + def get_template(self, name): """Return the jinja template object for a given name""" return self.settings['jinja2_env'].get_template(name) @@ -605,6 +621,7 @@ class BaseHandler(RequestHandler): status_code=status_code, status_message=status_message, message=message, + extra_error_html=getattr(self, 'extra_error_html', ''), exception=exception, ) @@ -658,6 +675,9 @@ class UserSpawnHandler(BaseHandler): current_user = self.get_current_user() if current_user and current_user.name == name: + # if spawning fails for any reason, point users to /hub/home to retry + self.extra_error_html = self.spawn_home_error + # If people visit /user/:name directly on the Hub, # the redirects will just loop, because the proxy is bypassed. # Try to check for that and warn, @@ -685,8 +705,7 @@ class UserSpawnHandler(BaseHandler): ): # Condition: spawner not active and _spawn_future exists and contains an Exception # Implicit spawn on /user/:name is not allowed if the user's last spawn failed. - # Point the user to Home - # if the most recent spawn + # We should point the user to Home if the most recent spawn failed. self.log.error("Preventing implicit spawn for %s because last spawn failed: %s", spawner._log_name, spawner._spawn_future.exception()) raise spawner._spawn_future.exception() diff --git a/share/jupyter/hub/templates/error.html b/share/jupyter/hub/templates/error.html index bef771a6..92fb5131 100644 --- a/share/jupyter/hub/templates/error.html +++ b/share/jupyter/hub/templates/error.html @@ -22,6 +22,11 @@ {{message_html | safe}}

{% endif %} + {% if extra_error_html %} +

+ {{extra_error_html | safe}} +

+ {% endif %} {% endblock error_detail %}