diff --git a/docs/source/changelog.md b/docs/source/changelog.md index fd2be09a..3793dce3 100644 --- a/docs/source/changelog.md +++ b/docs/source/changelog.md @@ -985,7 +985,7 @@ Bugfixes on 0.6: ### [0.6.0] - 2016-04-25 -- JupyterHub has moved to a new `jupyterhub` namespace on GitHub and Docker. What was `juptyer/jupyterhub` is now `jupyterhub/jupyterhub`, etc. +- JupyterHub has moved to a new `jupyterhub` namespace on GitHub and Docker. What was `jupyter/jupyterhub` is now `jupyterhub/jupyterhub`, etc. - `jupyterhub/jupyterhub` image on DockerHub no longer loads the jupyterhub_config.py in an ONBUILD step. A new `jupyterhub/jupyterhub-onbuild` image does this - Add statsd support, via `c.JupyterHub.statsd_{host,port,prefix}` - Update to traitlets 4.1 `@default`, `@observe` APIs for traits diff --git a/docs/source/reference/spawners.md b/docs/source/reference/spawners.md index b3fa15c5..ce39b598 100644 --- a/docs/source/reference/spawners.md +++ b/docs/source/reference/spawners.md @@ -37,14 +37,13 @@ Some examples include: Information about the user can be retrieved from `self.user`, an object encapsulating the user's name, authentication, and server info. -The return value of `Spawner.start` should be the (ip, port) of the running server. - -**NOTE:** When writing coroutines, _never_ `yield` in between a database change and a commit. +The return value of `Spawner.start` should be the `(ip, port)` of the running server, +or a full URL as a string. Most `Spawner.start` functions will look similar to this example: ```python -def start(self): +async def start(self): self.ip = '127.0.0.1' self.port = random_port() # get environment variables, @@ -56,8 +55,10 @@ def start(self): cmd.extend(self.cmd) cmd.extend(self.get_args()) - yield self._actually_start_server_somehow(cmd, env) - return (self.ip, self.port) + await self._actually_start_server_somehow(cmd, env) + # url may not match self.ip:self.port, but it could! + url = self._get_connectable_url() + return url ``` When `Spawner.start` returns, the single-user server process should actually be running, @@ -65,6 +66,48 @@ not just requested. JupyterHub can handle `Spawner.start` being very slow (such as PBS-style batch queues, or instantiating whole AWS instances) via relaxing the `Spawner.start_timeout` config value. +#### Note on IPs and ports + +`Spawner.ip` and `Spawner.port` attributes set the _bind_ url, +which the single-user server should listen on +(passed to the single-user process via the `JUPYTERHUB_SERVICE_URL` environment variable). +The _return_ value is the ip and port (or full url) the Hub should _connect to_. +These are not necessarily the same, and usually won't be in any Spawner that works with remote resources or containers. + +The default for Spawner.ip, and Spawner.port is `127.0.0.1:{random}`, +which is appropriate for Spawners that launch local processes, +where everything is on localhost and each server needs its own port. +For remote or container Spawners, it will often make sense to use a different value, +such as `ip = '0.0.0.0'` and a fixed port, e.g. `8888`. +The defaults can be changed in the class, +preserving configuration with traitlets: + +```python +from traitlets import default +from jupyterhub.spawner import Spawner + +class MySpawner(Spawner): + @default("ip") + def _default_ip(self): + return '0.0.0.0' + + @default("port") + def _default_port(self): + return 8888 + + async def start(self): + env = self.get_env() + cmd = [] + # get jupyterhub command to run, + # typically ['jupyterhub-singleuser'] + cmd.extend(self.cmd) + cmd.extend(self.get_args()) + + remote_server_info = await self._actually_start_server_somehow(cmd, env) + url = self.get_public_url_from(remote_server_info) + return url +``` + ### Spawner.poll `Spawner.poll` should check if the spawner is still running. @@ -207,6 +250,73 @@ Additionally, configurable attributes for your spawner will appear in jupyterhub help output and auto-generated configuration files via `jupyterhub --generate-config`. +## Environment variables and command-line arguments + +Spawners mainly do one thing: launch a command in an environment. + +The command-line is constructed from user configuration: + +- Spawner.cmd (default: `['jupterhub-singleuser']`) +- Spawner.args (cli args to pass to the cmd, default: empty) + +where the configuration: + +```python +c.Spawner.cmd = ["my-singleuser-wrapper"] +c.Spawner.args = ["--debug", "--flag"] +``` + +would result in spawning the command: + +```bash +my-singleuser-wrapper --debug --flag +``` + +The `Spawner.get_args()` method is how Spawner.args is accessed, +and can be used by Spawners to customize/extend user-provided arguments. + +Prior to 2.0, JupyterHub unconditionally added certain options _if specified_ to the command-line, +such as `--ip={Spawner.ip}` and `--port={Spawner.port}`. +These have now all been moved to environment variables, +and from JupyterHub 2.0, +the command-line launched by JupyterHub is fully specified by overridable configuration `Spawner.cmd + Spawner.args`. + +Most process configuration is passed via environment variables. +Additional variables can be specified via the `Spawner.environment` configuration. + +The process environment is returned by `Spawner.get_env`, which specifies the following environment variables: + +- JUPYTERHUB*SERVICE_URL - the \_bind* url where the server should launch its http server (`http://127.0.0.1:12345`). + This includes Spawner.ip and Spawner.port; _new in 2.0, prior to 2.0 ip,port were on the command-line and only if specified_ +- JUPYTERHUB_SERVICE_PREFIX - the URL prefix the service will run on (e.g. `/user/name/`) +- JUPYTERHUB_USER - the JupyterHub user's username +- JUPYTERHUB_SERVER_NAME - the server's name, if using named servers (default server has an empty name) +- JUPYTERHUB_API_URL - the full url for the JupyterHub API (http://17.0.0.1:8001/hub/api) +- JUPYTERHUB_BASE_URL - the base url of the whole jupyterhub deployment, i.e. the bit before `hub/` or `user/`, + as set by c.JupyterHub.base_url (default: `/`) +- JUPYTERHUB_API_TOKEN - the API token the server can use to make requests to the Hub. + This is also the OAuth client secret. +- JUPYTERHUB_CLIENT_ID - the OAuth client ID for authenticating visitors. +- JUPYTERHUB_OAUTH_CALLBACK_URL - the callback URL to use in oauth, typically `/user/:name/oauth_callback` + +Optional environment variables, depending on configuration: + +- JUPYTERHUB*SSL*[KEYFILE|CERTFILE|CLIENT_CI] - SSL configuration, when internal_ssl is enabled +- JUPYTERHUB_ROOT_DIR - the root directory of the server (notebook directory), when Spawner.notebook_dir is defined (new in 2.0) +- JUPYTERHUB_DEFAULT_URL - the default URL for the server (for redirects from /user/:name/), + if Spawner.default_url is defined + (new in 2.0, previously passed via cli) +- JUPYTERHUB_DEBUG=1 - generic debug flag, sets maximum log level when Spawner.debug is True + (new in 2.0, previously passed via cli) +- JUPYTERHUB_DISABLE_USER_CONFIG=1 - disable loading user config, + sets maximum log level when Spawner.debug is True (new in 2.0, + previously passed via cli) + +- JUPYTERHUB*[MEM|CPU]*[LIMIT_GUARANTEE] - the values of cpu and memory limits and guarantees. + These are not expected to be enforced by the process, + but are made available as a hint, + e.g. for resource monitoring extensions. + ## Spawners, resource limits, and guarantees (Optional) Some spawners of the single-user notebook servers allow setting limits or diff --git a/jupyterhub/singleuser/mixins.py b/jupyterhub/singleuser/mixins.py index d6d66ba1..0e8ade0c 100755 --- a/jupyterhub/singleuser/mixins.py +++ b/jupyterhub/singleuser/mixins.py @@ -52,6 +52,17 @@ from ..utils import make_ssl_context from ..utils import url_path_join +def _bool_env(key): + """Cast an environment variable to bool + + 0, empty, or unset is False; All other values are True. + """ + if os.environ.get(key, "") in {"", "0"}: + return False + else: + return True + + # Authenticate requests with the Hub @@ -278,6 +289,10 @@ class SingleUserNotebookAppMixin(Configurable): def _user_changed(self, change): self.log.name = change.new + @default("default_url") + def _default_url(self): + return os.environ.get("JUPYTERHUB_DEFAULT_URL", "/tree/") + hub_host = Unicode().tag(config=True) hub_prefix = Unicode('/hub/').tag(config=True) @@ -360,7 +375,26 @@ class SingleUserNotebookAppMixin(Configurable): """, ).tag(config=True) - @validate('notebook_dir') + @default("disable_user_config") + def _default_disable_user_config(self): + return _bool_env("JUPYTERHUB_DISABLE_USER_CONFIG") + + @default("root_dir") + def _default_root_dir(self): + if os.environ.get("JUPYTERHUB_ROOT_DIR"): + proposal = {"value": os.environ["JUPYTERHUB_ROOT_DIR"]} + # explicitly call validator, not called on default values + return self._notebook_dir_validate(proposal) + else: + return os.getcwd() + + # notebook_dir is used by the classic notebook server + # root_dir is the future in jupyter server + @default("notebook_dir") + def _default_notebook_dir(self): + return self._default_root_dir() + + @validate("notebook_dir", "root_dir") def _notebook_dir_validate(self, proposal): value = os.path.expanduser(proposal['value']) # Strip any trailing slashes @@ -376,6 +410,13 @@ class SingleUserNotebookAppMixin(Configurable): raise TraitError("No such notebook dir: %r" % value) return value + @default('log_level') + def _log_level_default(self): + if _bool_env("JUPYTERHUB_DEBUG"): + return logging.DEBUG + else: + return logging.INFO + @default('log_datefmt') def _log_datefmt_default(self): """Exclude date from default date format""" diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index 0b417fc3..fd18386c 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -39,7 +39,6 @@ from .traitlets import ByteSpecification from .traitlets import Callable from .traitlets import Command from .utils import exponential_backoff -from .utils import iterate_until from .utils import maybe_future from .utils import random_port from .utils import url_path_join @@ -246,11 +245,22 @@ class Spawner(LoggingConfigurable): ) ip = Unicode( - '', + '127.0.0.1', help=""" The IP address (or hostname) the single-user server should listen on. + Usually either '127.0.0.1' (default) or '0.0.0.0'. + The JupyterHub proxy implementation should be able to send packets to this interface. + + Subclasses which launch remotely or in containers + should override the default to '0.0.0.0'. + + .. versionchanged:: 2.0 + Default changed to '127.0.0.1', from ''. + In most cases, this does not result in a change in behavior, + as '' was interpreted as 'unspecified', + which used the subprocesses' own default, itself usually '127.0.0.1'. """, ).tag(config=True) @@ -811,8 +821,20 @@ class Spawner(LoggingConfigurable): 'activity', ) env['JUPYTERHUB_BASE_URL'] = self.hub.base_url[:-4] + if self.server: + base_url = self.server.base_url + if self.ip or self.port: + self.server.ip = self.ip + self.server.port = self.port env['JUPYTERHUB_SERVICE_PREFIX'] = self.server.base_url + else: + # this should only occur in mock/testing scenarios + base_url = '/' + + proto = 'https' if self.internal_ssl else 'http' + bind_url = f"{proto}://{self.ip}:{self.port}{base_url}" + env["JUPYTERHUB_SERVICE_URL"] = bind_url # Put in limit and guarantee info if they exist. # Note that this is for use by the humans / notebook extensions in the @@ -832,6 +854,20 @@ class Spawner(LoggingConfigurable): env['JUPYTERHUB_SSL_CERTFILE'] = self.cert_paths['certfile'] env['JUPYTERHUB_SSL_CLIENT_CA'] = self.cert_paths['cafile'] + if self.notebook_dir: + notebook_dir = self.format_string(self.notebook_dir) + env["JUPYTERHUB_ROOT_DIR"] = notebook_dir + + if self.default_url: + default_url = self.format_string(self.default_url) + env["JUPYTERHUB_DEFAULT_URL"] = default_url + + if self.debug: + env["JUPYTERHUB_DEBUG"] = "1" + + if self.disable_user_config: + env["JUPYTERHUB_DISABLE_USER_CONFIG"] = "1" + # env overrides from config. If the value is a callable, it will be called with # one parameter - the current spawner instance - and the return value # will be assigned to the environment variable. This will be called at @@ -843,7 +879,6 @@ class Spawner(LoggingConfigurable): env[key] = value(self) else: env[key] = value - return env async def get_url(self): @@ -1010,35 +1045,16 @@ class Spawner(LoggingConfigurable): """Return the arguments to be passed after self.cmd Doesn't expect shell expansion to happen. + + .. versionchanged:: 2.0 + Prior to 2.0, JupyterHub passed some options such as + ip, port, and default_url to the command-line. + JupyterHub 2.0 no longer builds any CLI args + other than `Spawner.cmd` and `Spawner.args`. + All values that come from jupyterhub itself + will be passed via environment variables. """ - args = [] - - if self.ip: - args.append('--ip=%s' % _quote_safe(self.ip)) - - if self.port: - args.append('--port=%i' % self.port) - elif self.server and self.server.port: - self.log.warning( - "Setting port from user.server is deprecated as of JupyterHub 0.7." - ) - args.append('--port=%i' % self.server.port) - - if self.notebook_dir: - notebook_dir = self.format_string(self.notebook_dir) - args.append('--notebook-dir=%s' % _quote_safe(notebook_dir)) - if self.default_url: - default_url = self.format_string(self.default_url) - args.append( - '--SingleUserNotebookApp.default_url=%s' % _quote_safe(default_url) - ) - - if self.debug: - args.append('--debug') - if self.disable_user_config: - args.append('--disable-user-config') - args.extend(self.args) - return args + return self.args def run_pre_spawn_hook(self): """Run the pre_spawn_hook if defined""" @@ -1482,7 +1498,8 @@ class LocalProcessSpawner(Spawner): async def start(self): """Start the single-user server.""" - self.port = random_port() + if self.port == 0: + self.port = random_port() cmd = [] env = self.get_env() diff --git a/jupyterhub/tests/mocksu.py b/jupyterhub/tests/mocksu.py index c5714cff..c9f4b85e 100644 --- a/jupyterhub/tests/mocksu.py +++ b/jupyterhub/tests/mocksu.py @@ -11,10 +11,10 @@ Handlers and their purpose include: - ArgsHandler: allowing retrieval of `sys.argv`. """ -import argparse import json import os import sys +from urllib.parse import urlparse from tornado import httpserver from tornado import ioloop @@ -36,7 +36,8 @@ class ArgsHandler(web.RequestHandler): self.write(json.dumps(sys.argv)) -def main(args): +def main(): + url = urlparse(os.environ["JUPYTERHUB_SERVICE_URL"]) options.logging = 'debug' log.enable_pretty_logging() app = web.Application( @@ -50,10 +51,11 @@ def main(args): if key and cert and ca: ssl_context = make_ssl_context(key, cert, cafile=ca, check_hostname=False) + assert url.scheme == "https" server = httpserver.HTTPServer(app, ssl_options=ssl_context) - log.app_log.info("Starting mock singleuser server at 127.0.0.1:%s", args.port) - server.listen(args.port, '127.0.0.1') + log.app_log.info(f"Starting mock singleuser server at {url.hostname}:{url.port}") + server.listen(url.port, url.hostname) try: ioloop.IOLoop.instance().start() except KeyboardInterrupt: @@ -61,7 +63,4 @@ def main(args): if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--port', type=int) - args, extra = parser.parse_known_args() - main(args) + main() diff --git a/jupyterhub/tests/test_api.py b/jupyterhub/tests/test_api.py index 6c90c1bd..03f45bc6 100644 --- a/jupyterhub/tests/test_api.py +++ b/jupyterhub/tests/test_api.py @@ -643,10 +643,17 @@ async def test_spawn(app): r = await async_requests.get(ujoin(url, 'args'), **kwargs) assert r.status_code == 200 argv = r.json() - assert '--port' in ' '.join(argv) + assert '--port' not in ' '.join(argv) + # we pass no CLI args anymore: + assert len(argv) == 1 r = await async_requests.get(ujoin(url, 'env'), **kwargs) env = r.json() - for expected in ['JUPYTERHUB_USER', 'JUPYTERHUB_BASE_URL', 'JUPYTERHUB_API_TOKEN']: + for expected in [ + 'JUPYTERHUB_USER', + 'JUPYTERHUB_BASE_URL', + 'JUPYTERHUB_API_TOKEN', + 'JUPYTERHUB_SERVICE_URL', + ]: assert expected in env if app.subdomain_host: assert env['JUPYTERHUB_HOST'] == app.subdomain_host