diff --git a/docs/rest-api.yml b/docs/rest-api.yml index 7104dc99..a54c4745 100644 --- a/docs/rest-api.yml +++ b/docs/rest-api.yml @@ -312,6 +312,30 @@ paths: responses: '200': description: The users have been removed from the group + /services: + get: + summary: List services + responses: + '200': + description: The service list + schema: + type: array + items: + $ref: '#/definitions/Service' + /services/{name}: + get: + summary: Get a service by name + parameters: + - name: name + description: service name + in: path + required: true + type: string + responses: + '200': + description: The Service model + schema: + $ref: '#/definitions/Service' /proxy: get: summary: Get the proxy's routing table @@ -436,3 +460,26 @@ definitions: description: The names of users who are members of this group items: type: string + Service: + type: object + properties: + name: + type: string + description: The service's name + admin: + type: boolean + description: Whether the service is an admin + url: + type: string + description: The internal url where the service is running + prefix: + type: string + description: The proxied URL prefix to the service's url + pid: + type: number + description: The PID of the service process (if managed) + command: + type: array + description: The command used to start the service (if managed) + items: + type: string diff --git a/docs/source/services.md b/docs/source/services.md new file mode 100644 index 00000000..eafee5e9 --- /dev/null +++ b/docs/source/services.md @@ -0,0 +1,88 @@ +# JupyterHub services + +JupyterHub 0.7 adds the notion of Services. +A Service is a process that interacts with the Hub REST API. +Services may perform actions such as shutting down user servers that have been idle for some time, +or registering additional web servers that should also use the Hub's authentication +and be served behind the Hub's proxy. + +There are two main characteristics of services: + +1. Is it **managed** by JupyterHub? +2. Does it have a web server that should be added to the proxy? + +If a `command` is specified for launching the service, it will be started and managed by the Hub. +If a `url` is specified for where the service runs its own webserver, +it will be added to the Hub's proxy at `/service/:service-name`. + +## Managed services + +**Managed** services are services that the Hub starts and is responsible for. +These can only be local subprocesses of the Hub, +and the Hub will take care of starting these processes and restarting them if they stop. + +While there are similarities with notebook Spawners, +there are no plans to support the same spawning abstractions as notebook. +If you want to run these services in docker or other environments, +you can register it as an external service below. + +A managed service is characterized by the `command` specified for launching the service. + + +```python +c.JupyterHub.services = [ + { + 'name': 'cull-idle', + 'admin': True, + 'command': ['python', '/path/to/cull-idle.py', '--interval'] + } +] +``` + +In addition to `command`, managed services can take additional optional parameters, +to describe the environment in which to start the process: + +- `env: dict` additional environment variables for the service. +- `user: str` name of the user to run the server as if different from the Hub. + Requires Hub to be root. +- `cwd: path` directory in which to run the service, if different from the Hub directory. + +When the service starts, the Hub will pass the following environment variables: + +``` +JUPYTERHUB_SERVICE_NAME: the name of the service ('cull-idle' above) +JUPYTERHUB_API_TOKEN: API token assigned to the service +JUPYTERHUB_API_URL: URL for the JupyterHub API (http://127.0.0.1:8080/hub/api) +JUPYTERHUB_BASE_URL: Base URL of the Hub (https://mydomain[:port]/) +JUPYTERHUB_SERVICE_PREFIX: URL path prefix of this service (/services/cull-idle/) +``` + +## External services + +You can use your own service management tools, such as docker or systemd, to manage JupyterHub services. +These are not subprocesses of the Hub, and you must tell JupyterHub what API token the service is using to perform its API requests. +Each service will need a unique API token because the Hub authenticates each API request, +identifying the originating service or user. + +An example of an externally managed service with admin access and running its own web server: + +```python +c.JupyterHub.services = [ + { + 'name': 'my-web-service', + 'url': 'https://10.0.1.1:1984', + 'api_token': 'super-secret', + } +] +``` + + +## Writing your own services + +TODO + +### Authenticating with the Hub + +TODO + +JupyterHub 0.7 introduces some utiltiies for you to use that allow you to use the Hub's authentication mechanism. \ No newline at end of file diff --git a/examples/cull-idle/cull_idle_servers.py b/examples/cull-idle/cull_idle_servers.py index 82ac4b78..8a4dfbf7 100644 --- a/examples/cull-idle/cull_idle_servers.py +++ b/examples/cull-idle/cull_idle_servers.py @@ -9,10 +9,21 @@ so cull timeout should be greater than the sum of: - single-user websocket ping interval (default: 30s) - JupyterHub.last_activity_interval (default: 5 minutes) -Generate an API token and store it in `JPY_API_TOKEN`: +You can run this as a service managed by JupyterHub with this in your config:: - export JPY_API_TOKEN=`jupyterhub token` - python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub] + + c.JupyterHub.services = [ + { + 'name': 'cull-idle', + 'admin': True, + 'command': 'python cull_idle_servers.py --timeout=3600'.split(), + } + ] + +Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`: + + export JUPYTERHUB_API_TOKEN=`jupyterhub token` + python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] """ import datetime @@ -34,7 +45,7 @@ def cull_idle(url, api_token, timeout): auth_header = { 'Authorization': 'token %s' % api_token } - req = HTTPRequest(url=url + '/api/users', + req = HTTPRequest(url=url + '/users', headers=auth_header, ) now = datetime.datetime.utcnow() @@ -47,7 +58,7 @@ def cull_idle(url, api_token, timeout): last_activity = parse_date(user['last_activity']) if user['server'] and last_activity < cull_limit: app_log.info("Culling %s (inactive since %s)", user['name'], last_activity) - req = HTTPRequest(url=url + '/api/users/%s/server' % user['name'], + req = HTTPRequest(url=url + '/users/%s/server' % user['name'], method='DELETE', headers=auth_header, ) @@ -60,7 +71,7 @@ def cull_idle(url, api_token, timeout): app_log.debug("Finished culling %s", name) if __name__ == '__main__': - define('url', default='http://127.0.0.1:8081/hub', help="The JupyterHub API URL") + define('url', default=os.environ.get('JUPYTERHUB_API_URL'), help="The JupyterHub API URL") define('timeout', default=600, help="The idle timeout (in seconds)") define('cull_every', default=0, help="The interval (in seconds) for checking for idle servers to cull") @@ -68,7 +79,7 @@ if __name__ == '__main__': if not options.cull_every: options.cull_every = options.timeout // 2 - api_token = os.environ['JPY_API_TOKEN'] + api_token = os.environ['JUPYTERHUB_API_TOKEN'] loop = IOLoop.current() cull = lambda : cull_idle(options.url, api_token, options.timeout) diff --git a/examples/cull-idle/jupyterhub_config.py b/examples/cull-idle/jupyterhub_config.py new file mode 100644 index 00000000..2de8f424 --- /dev/null +++ b/examples/cull-idle/jupyterhub_config.py @@ -0,0 +1,8 @@ +# run cull-idle as a service +c.JupyterHub.services = [ + { + 'name': 'cull-idle', + 'admin': True, + 'command': 'python cull_idle_servers.py --timeout=3600'.split(), + } +] diff --git a/jupyterhub/apihandlers/__init__.py b/jupyterhub/apihandlers/__init__.py index 5ded7b7e..e28d5c45 100644 --- a/jupyterhub/apihandlers/__init__.py +++ b/jupyterhub/apihandlers/__init__.py @@ -1,11 +1,6 @@ from .base import * -from .auth import * -from .hub import * -from .proxy import * -from .users import * -from .groups import * -from . import auth, hub, proxy, users +from . import auth, hub, proxy, users, groups, services default_handlers = [] -for mod in (auth, hub, proxy, users, groups): +for mod in (auth, hub, proxy, users, groups, services): default_handlers.extend(mod.default_handlers) diff --git a/jupyterhub/apihandlers/proxy.py b/jupyterhub/apihandlers/proxy.py index 54d75500..a2b705f3 100644 --- a/jupyterhub/apihandlers/proxy.py +++ b/jupyterhub/apihandlers/proxy.py @@ -28,7 +28,7 @@ class ProxyAPIHandler(APIHandler): @gen.coroutine def post(self): """POST checks the proxy to ensure""" - yield self.proxy.check_routes(self.users) + yield self.proxy.check_routes(self.users, self.services) @admin_only @@ -59,7 +59,7 @@ class ProxyAPIHandler(APIHandler): self.proxy.auth_token = model['auth_token'] self.db.commit() self.log.info("Updated proxy at %s", server.bind_url) - yield self.proxy.check_routes(self.users) + yield self.proxy.check_routes(self.users, self.services) diff --git a/jupyterhub/apihandlers/services.py b/jupyterhub/apihandlers/services.py new file mode 100644 index 00000000..7c4bfb61 --- /dev/null +++ b/jupyterhub/apihandlers/services.py @@ -0,0 +1,64 @@ +"""Service handlers + +Currently GET-only, no actions can be taken to modify services. +""" + +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +import json + +from tornado import web + +from .. import orm +from ..utils import admin_only +from .base import APIHandler + +def service_model(service): + """Produce the model for a service""" + return { + 'name': service.name, + 'admin': service.admin, + 'url': service.url, + 'prefix': service.server.base_url if service.server else '', + 'command': service.command, + 'pid': service.proc.pid if service.proc else 0, + } + +class ServiceListAPIHandler(APIHandler): + @admin_only + def get(self): + data = {name: service_model(service) for name, service in self.services.items()} + self.write(json.dumps(data)) + + +def admin_or_self(method): + """Decorator for restricting access to either the target service or admin""" + def decorated_method(self, name): + current = self.get_current_user() + if current is None: + raise web.HTTPError(403) + if not current.admin: + # not admin, maybe self + if not isinstance(current, orm.Service): + raise web.HTTPError(403) + if current.name != name: + raise web.HTTPError(403) + # raise 404 if not found + if name not in self.services: + raise web.HTTPError(404) + return method(self, name) + return decorated_method + +class ServiceAPIHandler(APIHandler): + + @admin_or_self + def get(self, name): + service = self.services[name] + self.write(json.dumps(service_model(service))) + + +default_handlers = [ + (r"/api/services", ServiceListAPIHandler), + (r"/api/services/([^/]+)", ServiceAPIHandler), +] diff --git a/jupyterhub/app.py b/jupyterhub/app.py index f42ff689..667be204 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -44,6 +44,7 @@ here = os.path.dirname(__file__) import jupyterhub from . import handlers, apihandlers from .handlers.static import CacheControlStaticFilesHandler, LogoHandler +from .services.service import Service from . import dbutil, orm from .user import User, UserDict @@ -291,6 +292,15 @@ class JupyterHub(Application): # if not specified, assume https: You have to be really explicit about HTTP! self.subdomain_host = 'https://' + new + domain = Unicode( + help="domain name, e.g. 'example.com' (excludes protocol, port)" + ) + @default('domain') + def _domain_default(self): + if not self.subdomain_host: + return '' + return urlparse(self.subdomain_host).hostname + port = Integer(8000, help="The public facing port of the proxy" ).tag(config=True) @@ -408,6 +418,29 @@ class JupyterHub(Application): Allows ahead-of-time generation of API tokens for use by externally managed services. """ ).tag(config=True) + + services = List(Dict(), + help="""List of service specification dictionaries. + + A service + + For instance:: + + services = [ + { + 'name': 'cull_idle', + 'command': ['/path/to/cull_idle_servers.py'], + }, + { + 'name': 'formgrader', + 'url': 'http://127.0.0.1:1234', + 'token': 'super-secret', + 'env': + } + ] + """ + ).tag(config=True) + _service_map = Dict() authenticator_class = Type(PAMAuthenticator, Authenticator, help="""Class for authenticating users. @@ -933,6 +966,75 @@ class JupyterHub(Application): """Load predefined API tokens (for services) into database""" self._add_tokens(self.service_tokens, kind='service') self._add_tokens(self.api_tokens, kind='user') + + def init_services(self): + self._service_map.clear() + if self.domain: + domain = 'services.' + self.domain + parsed = urlparse(self.subdomain_host) + host = '%s://services.%s' % (parsed.scheme, parsed.netloc) + else: + domain = host = '' + for spec in self.services: + if 'name' not in spec: + raise ValueError('service spec must have a name: %r' % spec) + name = spec['name'] + # get/create orm + orm_service = orm.Service.find(self.db, name=name) + if orm_service is None: + # not found, create a new one + orm_service = orm.Service(name=name) + self.db.add(orm_service) + orm_service.admin = spec.get('admin', False) + self.db.commit() + service = Service(parent=self, + base_url=self.base_url, + db=self.db, orm=orm_service, + domain=domain, host=host, + hub_api_url=self.hub.api_url, + ) + + traits = service.traits(input=True) + for key, value in spec.items(): + if key not in traits: + raise AttributeError("No such service field: %s" % key) + setattr(service, key, value) + + if service.url: + parsed = urlparse(service.url) + if parsed.port is not None: + port = parsed.port + elif parsed.scheme == 'http': + port = 80 + elif parsed.scheme == 'https': + port = 443 + server = service.orm.server = orm.Server( + proto=parsed.scheme, + ip=parsed.hostname, + port=port, + cookie_name='jupyterhub-services', + base_url=service.prefix, + ) + self.db.add(server) + else: + service.orm.server = None + + self._service_map[name] = service + if service.managed: + if not service.api_token: + # generate new token + service.api_token = service.orm.new_api_token() + else: + # ensure provided token is registered + self.service_tokens[service.api_token] = service.name + else: + self.service_tokens[service.api_token] = service.name + + # delete services from db not in service config: + for service in self.db.query(orm.Service): + if service.name not in self._service_map: + self.db.delete(service) + self.db.commit() @gen.coroutine def init_spawners(self): @@ -1102,6 +1204,7 @@ class JupyterHub(Application): yield self.start_proxy() self.log.info("Setting up routes on new proxy") yield self.proxy.add_all_users(self.users) + yield self.proxy.add_all_services(self.services) self.log.info("New proxy back up, and good to go") def init_tornado_settings(self): @@ -1127,8 +1230,6 @@ class JupyterHub(Application): else: version_hash=datetime.now().strftime("%Y%m%d%H%M%S"), - subdomain_host = self.subdomain_host - domain = urlparse(subdomain_host).hostname settings = dict( log_function=log_request, config=self.config, @@ -1151,8 +1252,8 @@ class JupyterHub(Application): template_path=self.template_paths, jinja2_env=jinja_env, version_hash=version_hash, - subdomain_host=subdomain_host, - domain=domain, + subdomain_host=self.subdomain_host, + domain=self.domain, statsd=self.statsd, ) # allow configured settings to have priority @@ -1160,6 +1261,7 @@ class JupyterHub(Application): self.tornado_settings = settings # constructing users requires access to tornado_settings self.tornado_settings['users'] = self.users + self.tornado_settings['services'] = self._service_map def init_tornado_application(self): """Instantiate the tornado Application object""" @@ -1197,6 +1299,7 @@ class JupyterHub(Application): self.init_proxy() yield self.init_users() self.init_groups() + self.init_services() self.init_api_tokens() self.init_tornado_settings() yield self.init_spawners() @@ -1300,7 +1403,7 @@ class JupyterHub(Application): self.statsd.gauge('users.active', active_users_count) self.db.commit() - yield self.proxy.check_routes(self.users, routes) + yield self.proxy.check_routes(self.users, self._service_map, routes) @gen.coroutine def start(self): @@ -1333,9 +1436,16 @@ class JupyterHub(Application): except Exception as e: self.log.critical("Failed to start proxy", exc_info=True) self.exit(1) - return + + for service_name, service in self._service_map.items(): + try: + yield service.start() + except Exception as e: + self.log.critical("Failed to start service %s", service_name, exc_info=True) + self.exit(1) loop.add_callback(self.proxy.add_all_users, self.users) + loop.add_callback(self.proxy.add_all_services, self._service_map) if self.proxy_process: # only check / restart the proxy if we started it in the first place. diff --git a/jupyterhub/handlers/base.py b/jupyterhub/handlers/base.py index 07651787..f03f817b 100644 --- a/jupyterhub/handlers/base.py +++ b/jupyterhub/handlers/base.py @@ -68,6 +68,9 @@ class BaseHandler(RequestHandler): return self.settings.setdefault('users', {}) @property + def services(self): + return self.settings.setdefault('services', {}) + @property def hub(self): return self.settings['hub'] @@ -236,6 +239,10 @@ class BaseHandler(RequestHandler): **kwargs ) + def set_service_cookie(self, user): + """set the login cookie for services""" + self._set_user_cookie(user, self.service_server) + def set_server_cookie(self, user): """set the login cookie for the single-user server""" self._set_user_cookie(user, user.server) @@ -254,6 +261,10 @@ class BaseHandler(RequestHandler): if user.server: self.set_server_cookie(user) + # set single cookie for services + if self.db.query(orm.Service).first(): + self.set_service_cookie(user) + # create and set a new cookie token for the hub if not self.get_current_user_cookie(): self.set_hub_cookie(user) diff --git a/jupyterhub/orm.py b/jupyterhub/orm.py index f7241b9a..5e581e19 100644 --- a/jupyterhub/orm.py +++ b/jupyterhub/orm.py @@ -152,6 +152,35 @@ class Proxy(Base): return client.fetch(req) + @gen.coroutine + def add_service(self, service, client=None): + """Add a service's server to the proxy table.""" + if not service.server: + raise RuntimeError( + "Service %s does not have an http endpoint to add to the proxy.", service.name) + + self.log.info("Adding service %s to proxy %s => %s", + service.name, service.proxy_path, service.server.host, + ) + + yield self.api_request(service.proxy_path, + method='POST', + body=dict( + target=service.server.host, + service=service.name, + ), + client=client, + ) + + @gen.coroutine + def delete_service(self, service, client=None): + """Remove a service's server from the proxy table.""" + self.log.info("Removing service %s from proxy", service.name) + yield self.api_request(service.proxy_path, + method='DELETE', + client=client, + ) + @gen.coroutine def add_user(self, user, client=None): """Add a user's server to the proxy table.""" @@ -174,7 +203,7 @@ class Proxy(Base): @gen.coroutine def delete_user(self, user, client=None): - """Remove a user's server to the proxy table.""" + """Remove a user's server from the proxy table.""" self.log.info("Removing user %s from proxy", user.name) yield self.api_request(user.proxy_path, method='DELETE', @@ -182,10 +211,20 @@ class Proxy(Base): ) @gen.coroutine - def get_routes(self, client=None): - """Fetch the proxy's routes""" - resp = yield self.api_request('', client=client) - return json.loads(resp.body.decode('utf8', 'replace')) + def add_all_services(self, service_dict): + """Update the proxy table from the database. + + Used when loading up a new proxy. + """ + db = inspect(self).session + futures = [] + for orm_service in db.query(Service): + service = service_dict[orm_service.name] + if service.server: + futures.append(self.add_service(service)) + # wait after submitting them all + for f in futures: + yield f @gen.coroutine def add_all_users(self, user_dict): @@ -204,12 +243,18 @@ class Proxy(Base): yield f @gen.coroutine - def check_routes(self, user_dict, routes=None): + def get_routes(self, client=None): + """Fetch the proxy's routes""" + resp = yield self.api_request('', client=client) + return json.loads(resp.body.decode('utf8', 'replace')) + + @gen.coroutine + def check_routes(self, user_dict, service_dict, routes=None): """Check that all users are properly routed on the proxy""" if not routes: routes = yield self.get_routes() - have_routes = { r['user'] for r in routes.values() if 'user' in r } + user_routes = { r['user'] for r in routes.values() if 'user' in r } futures = [] db = inspect(self).session for orm_user in db.query(User).filter(User.server != None): @@ -222,9 +267,22 @@ class Proxy(Base): # catch filter bug, either in sqlalchemy or my understanding of its behavior self.log.error("User %s has no server, but wasn't filtered out.", user) continue - if user.name not in have_routes: + if user.name not in user_routes: self.log.warning("Adding missing route for %s (%s)", user.name, user.server) futures.append(self.add_user(user)) + + # check service routes + service_routes = { r['service'] for r in routes.values() if 'service' in r } + for orm_service in db.query(Service).filter(Service.server != None): + service = service_dict[orm_service.name] + if service.server is None: + # This should never be True, but seems to be on rare occasion. + # catch filter bug, either in sqlalchemy or my understanding of its behavior + self.log.error("Service %s has no server, but wasn't filtered out.", service) + continue + if service.name not in service_routes: + self.log.warning("Adding missing route for %s (%s)", service.name, service.server) + futures.append(self.add_service(service)) for f in futures: yield f @@ -351,13 +409,6 @@ class User(Base): return db.query(cls).filter(cls.name==name).first() -# service:server many:many mapping table -service_server_map = Table('service_server_map', Base.metadata, - Column('service_id', ForeignKey('services.id')), - Column('server_id', ForeignKey('servers.id')), -) - - class Service(Base): """A service run with JupyterHub @@ -369,10 +420,10 @@ class Service(Base): - name - admin - api tokens + - server (if proxied http endpoint) In addition to what it has in common with users, a Service has extra info: - - servers: list of HTTP endpoints for the service - pid: the process id (if managed) """ @@ -386,7 +437,8 @@ class Service(Base): api_tokens = relationship("APIToken", backref="service") # service-specific interface - servers = relationship('Server', secondary='service_server_map') + _server_id = Column(Integer, ForeignKey('servers.id')) + server = relationship(Server, primaryjoin=_server_id == Server.id) pid = Column(Integer) def new_api_token(self, token=None): diff --git a/jupyterhub/services/service.py b/jupyterhub/services/service.py new file mode 100644 index 00000000..bfa1736b --- /dev/null +++ b/jupyterhub/services/service.py @@ -0,0 +1,257 @@ +"""A service is a process that talks to JupyterHub + +Cases: + +Managed: + - managed by JuyterHub (always subprocess, no custom Spawners) + - always a long-running process + - managed services are restarted automatically if they exit unexpectedly +Unmanaged: + - managed by external service (docker, systemd, etc.) + - do not need to be long-running processes, or processes at all + + +URL: needs a route added to the proxy. + - Public route will always be /services/service-name + - url specified in config + - if port is 0, Hub will select a port + +API access: + - admin: tokens will have admin-access to the API + - not admin: tokens will only have non-admin access + (not much they can do other than defer to Hub for auth) + +An externally managed service running on a URL:: + + { + 'name': 'my-service', + 'url': 'https://host:8888', + 'admin': True, + 'token': 'super-secret', + } + +A hub-managed service with no URL: + + { + 'name': 'cull-idle', + 'command': ['python', '/path/to/cull-idle'] + 'admin': True, + } +""" + +from getpass import getuser +import pipes +import shutil +from subprocess import Popen +from urllib.parse import urlparse + +from tornado import gen + +from traitlets import ( + HasTraits, + Any, Bool, Dict, Unicode, Instance, + default, observe, +) +from traitlets.config import LoggingConfigurable + +from .. import orm +from ..traitlets import Command +from ..spawner import LocalProcessSpawner +from ..utils import url_path_join + +class _MockUser(HasTraits): + name = Unicode() + server = Instance(orm.Server, allow_none=True) + state = Dict() + service = Instance(__module__ + '.Service') + +# We probably shouldn't use a Spawner here, +# but there are too many concepts to share. + +class _ServiceSpawner(LocalProcessSpawner): + """Subclass of LocalProcessSpawner + + Removes notebook-specific-ness from LocalProcessSpawner. + """ + cwd = Unicode() + cmd = Command(minlen=0) + + def make_preexec_fn(self, name): + if not name or name == getuser(): + # no setuid if no name + return + return super().make_preexec_fn(name) + + def start(self): + """Start the process""" + env = self.get_env() + cmd = self.cmd + + self.log.info("Spawning %s", ' '.join(pipes.quote(s) for s in cmd)) + try: + self.proc = Popen(self.cmd, env=env, + preexec_fn=self.make_preexec_fn(self.user.name), + start_new_session=True, # don't forward signals + cwd=self.cwd or None, + ) + except PermissionError: + # use which to get abspath + script = shutil.which(cmd[0]) or cmd[0] + self.log.error("Permission denied trying to run %r. Does %s have access to this file?", + script, self.user.name, + ) + raise + + self.pid = self.proc.pid + +class Service(LoggingConfigurable): + """An object wrapping a service specification for Hub API consumers. + + A service has inputs: + + - name: str + the name of the service + - admin: bool(false) + whether the service should have administrative privileges + - url: str (None) + The URL where the service is/should be. + If specified, the service will be added to the proxy at /services/:name + + If a service is to be managed by the Hub, it has a few extra options: + + - command: (str/Popen list) + Command for JupyterHub to spawn the service. + Only use this if the service should be a subprocess. + If command is not specified, it is assumed to be managed + by a + - env: dict + environment variables to add to the current env + - user: str + The name of a system user to become. + If unspecified, run as the same user as the Hub. + """ + + # inputs: + name = Unicode( + help="""The name of the service. + + If the service has an http endpoint, it + """ + ).tag(input=True) + admin = Bool(False, + help="Does the service need admin-access to the Hub API?" + ).tag(input=True) + url = Unicode( + help="""URL of the service. + + Only specify if the service runs an HTTP(s) endpoint that. + If managed, will be passed as JUPYTERHUB_SERVICE_URL env. + """ + ).tag(input=True) + api_token = Unicode( + help="""The API token to use for the service. + + If unspecified, an API token will be generated for managed services. + """ + ).tag(input=True) + # Managed service API: + + @property + def managed(self): + """Am I managed by the Hub?""" + return bool(self.command) + + command = Command(minlen=0, + help="Command to spawn this service, if managed." + ).tag(input=True) + cwd = Unicode( + help="""The working directory in which to run the service.""" + ).tag(input=True) + environment = Dict( + help="""Environment variables to pass to the service. + Only used if the Hub is spawning the service. + """ + ).tag(input=True) + user = Unicode(getuser(), + help="""The user to become when launching the service. + + If unspecified, run the service as the same user as the Hub. + """ + ).tag(input=True) + + domain = Unicode() + host = Unicode() + proc = Any() + + # handles on globals: + proxy = Any() + base_url = Unicode() + db = Any() + orm = Any() + + @property + def server(self): + return self.orm.server + + @property + def prefix(self): + return url_path_join(self.base_url, 'services', self.name) + + @property + def proxy_path(self): + if not self.server: + return '' + if self.domain: + return url_path_join('/' + self.domain, self.server.base_url) + else: + return self.server.base_url + + def __repr__(self): + return "<{cls}(name={name}{managed})>".format( + cls=self.__class__.__name__, + name=self.name, + managed=' managed' if self.managed else '', + ) + + def start(self): + """Start a managed service""" + if not self.managed: + raise RuntimeError("Cannot start unmanaged service %s" % self) + self.log.info("Starting service %r: %r", self.name, self.command) + env = {} + env.update(self.environment) + + env['JUPYTERHUB_SERVICE_NAME'] = self.name + env['JUPYTERHUB_API_TOKEN'] = self.api_token + env['JUPYTERHUB_API_URL'] = self.hub_api_url + env['JUPYTERHUB_BASE_URL'] = self.base_url + env['JUPYTERHUB_SERVICE_PREFIX'] = self.server.base_url + env['JUPYTERHUB_SERVICE_URL'] = self.url + + self.spawner = _ServiceSpawner( + cmd=self.command, + environment=env, + api_token=self.api_token, + cwd=self.cwd, + user=_MockUser( + name=self.user, + service=self, + server=self.orm.server, + ), + ) + self.spawner.start() + self.proc = self.spawner.proc + self.spawner.add_poll_callback(self._proc_stopped) + self.spawner.start_polling() + + def _proc_stopped(self): + """Called when the service process unexpectedly exits""" + self.log.error("Service %s exited with status %i", self.name, self.proc.returncode) + self.start() + + def stop(self): + """Stop a managed service""" + if not self.managed: + raise RuntimeError("Cannot start unmanaged service %s" % self) + self.spawner.stop_polling() + return self.spawner.stop() diff --git a/jupyterhub/spawner.py b/jupyterhub/spawner.py index 775c41be..0ff15d16 100644 --- a/jupyterhub/spawner.py +++ b/jupyterhub/spawner.py @@ -571,4 +571,4 @@ class LocalProcessSpawner(Spawner): if status is None: # it all failed, zombie process self.log.warning("Process %i never died", self.pid) - + diff --git a/jupyterhub/tests/conftest.py b/jupyterhub/tests/conftest.py index 77c42ddd..0497dbb1 100644 --- a/jupyterhub/tests/conftest.py +++ b/jupyterhub/tests/conftest.py @@ -5,14 +5,19 @@ import logging from getpass import getuser - -from pytest import fixture +from subprocess import TimeoutExpired +import time +from unittest import mock +from pytest import fixture, yield_fixture, raises from tornado import ioloop from .. import orm +from ..utils import random_port from .mocking import MockHub +from .test_services import mockservice_cmd +import jupyterhub.services.service # global db session object _db = None @@ -53,3 +58,34 @@ def app(request): app.stop() request.addfinalizer(fin) return app + + +# mock services for testing. +# Shorter intervals, etc. +class MockServiceSpawner(jupyterhub.services.service._ServiceSpawner): + poll_interval = 1 + + +@yield_fixture +def mockservice(request, app): + name = 'mock-service' + with mock.patch.object(jupyterhub.services.service, '_ServiceSpawner', MockServiceSpawner): + app.services = [{ + 'name': name, + 'command': mockservice_cmd, + 'url': 'http://127.0.0.1:%i' % random_port(), + 'admin': True, + }] + app.init_services() + app.io_loop.add_callback(app.proxy.add_all_services, app._service_map) + assert name in app._service_map + service = app._service_map[name] + app.io_loop.add_callback(service.start) + request.addfinalizer(service.stop) + for i in range(20): + if not getattr(service, 'proc', False): + time.sleep(0.2) + # ensure process finishes starting + with raises(TimeoutExpired): + service.proc.wait(1) + yield service diff --git a/jupyterhub/tests/mocking.py b/jupyterhub/tests/mocking.py index 9f39a2f2..f6db4a92 100644 --- a/jupyterhub/tests/mocking.py +++ b/jupyterhub/tests/mocking.py @@ -209,25 +209,17 @@ def public_host(app): return app.proxy.public_server.host -def public_url(app): +def public_url(app, user_or_service=None): """Return the full, public base URL (including prefix) of the given JupyterHub instance.""" - return public_host(app) + app.proxy.public_server.base_url - - -def user_url(user, app): - """Return the full public URL for a given user. - - Args: - user: user object, as return by app.users['username'] - app: MockHub instance - Returns: - url (str): The public URL for user. - """ - if app.subdomain_host: - host = user.host + if user_or_service: + if app.subdomain_host: + host = user_or_service.host + else: + host = public_host(app) + return host + user_or_service.server.base_url else: - host = public_host(app) - return host + user.server.base_url + return public_host(app) + app.proxy.public_server.base_url + # single-user-server mocking: diff --git a/jupyterhub/tests/mockservice.py b/jupyterhub/tests/mockservice.py new file mode 100644 index 00000000..e989c1e5 --- /dev/null +++ b/jupyterhub/tests/mockservice.py @@ -0,0 +1,59 @@ +"""Mock service for testing + +basic HTTP Server that echos URLs back, +and allow retrieval of sys.argv. +""" + +import argparse +import json +import os +import sys +from urllib.parse import urlparse + +import requests +from tornado import web, httpserver, ioloop + + +class EchoHandler(web.RequestHandler): + def get(self): + self.write(self.request.path) + + +class EnvHandler(web.RequestHandler): + def get(self): + self.set_header('Content-Type', 'application/json') + self.write(json.dumps(dict(os.environ))) + + +class APIHandler(web.RequestHandler): + def get(self, path): + api_token = os.environ['JUPYTERHUB_API_TOKEN'] + api_url = os.environ['JUPYTERHUB_API_URL'] + r = requests.get(api_url + path, headers={ + 'Authorization': 'token %s' % api_token + }) + r.raise_for_status() + self.set_header('Content-Type', 'application/json') + self.write(r.text) + + +def main(): + if os.environ['JUPYTERHUB_SERVICE_URL']: + url = urlparse(os.environ['JUPYTERHUB_SERVICE_URL']) + app = web.Application([ + (r'.*/env', EnvHandler), + (r'.*/api/(.*)', APIHandler), + (r'.*', EchoHandler), + ]) + + server = httpserver.HTTPServer(app) + server.listen(url.port, url.hostname) + try: + ioloop.IOLoop.instance().start() + except KeyboardInterrupt: + print('\nInterrupted') + +if __name__ == '__main__': + from tornado.options import parse_command_line + parse_command_line() + main() diff --git a/jupyterhub/tests/test_api.py b/jupyterhub/tests/test_api.py index e14b0662..a82aeaa1 100644 --- a/jupyterhub/tests/test_api.py +++ b/jupyterhub/tests/test_api.py @@ -6,6 +6,7 @@ from queue import Queue import sys from urllib.parse import urlparse, quote +from pytest import mark import requests from tornado import gen @@ -15,7 +16,7 @@ from .. import orm from ..user import User from ..utils import url_path_join as ujoin from . import mocking -from .mocking import public_host, public_url, user_url +from .mocking import public_host, public_url def check_db_locks(func): @@ -155,6 +156,7 @@ def test_referer_check(app, io_loop): # user API tests +@mark.user def test_get_users(app): db = app.db r = api_request(app, 'users') @@ -185,6 +187,8 @@ def test_get_users(app): ) assert r.status_code == 403 + +@mark.user def test_add_user(app): db = app.db name = 'newuser' @@ -196,6 +200,7 @@ def test_add_user(app): assert not user.admin +@mark.user def test_get_user(app): name = 'user' r = api_request(app, 'users', name) @@ -211,6 +216,7 @@ def test_get_user(app): } +@mark.user def test_add_multi_user_bad(app): r = api_request(app, 'users', method='post') assert r.status_code == 400 @@ -220,6 +226,7 @@ def test_add_multi_user_bad(app): assert r.status_code == 400 +@mark.user def test_add_multi_user_invalid(app): app.authenticator.username_pattern = r'w.*' r = api_request(app, 'users', method='post', @@ -230,6 +237,7 @@ def test_add_multi_user_invalid(app): assert r.json()['message'] == 'Invalid usernames: andrew, tara' +@mark.user def test_add_multi_user(app): db = app.db names = ['a', 'b'] @@ -265,6 +273,7 @@ def test_add_multi_user(app): assert r_names == ['ab'] +@mark.user def test_add_multi_user_admin(app): db = app.db names = ['c', 'd'] @@ -283,6 +292,7 @@ def test_add_multi_user_admin(app): assert user.admin +@mark.user def test_add_user_bad(app): db = app.db name = 'dne_newuser' @@ -291,6 +301,8 @@ def test_add_user_bad(app): user = find_user(db, name) assert user is None + +@mark.user def test_add_admin(app): db = app.db name = 'newadmin' @@ -304,6 +316,7 @@ def test_add_admin(app): assert user.admin +@mark.user def test_delete_user(app): db = app.db mal = add_user(db, name='mal') @@ -311,6 +324,7 @@ def test_delete_user(app): assert r.status_code == 204 +@mark.user def test_make_admin(app): db = app.db name = 'admin2' @@ -366,7 +380,7 @@ def test_spawn(app, io_loop): assert status is None assert user.server.base_url == ujoin(app.base_url, 'user/%s' % name) - url = user_url(user, app) + url = public_url(app, user) print(url) r = requests.get(url) assert r.status_code == 200 @@ -542,6 +556,7 @@ def test_bad_get_token(app): # group API tests +@mark.group def test_groups_list(app): r = api_request(app, 'groups') r.raise_for_status() @@ -562,6 +577,7 @@ def test_groups_list(app): }] +@mark.group def test_group_get(app): group = orm.Group.find(app.db, name='alphaflight') user = add_user(app.db, app=app, name='sasquatch') @@ -580,6 +596,7 @@ def test_group_get(app): } +@mark.group def test_group_create_delete(app): db = app.db r = api_request(app, 'groups/runaways', method='delete') @@ -613,9 +630,9 @@ def test_group_create_delete(app): # delete nonexistant gives 404 r = api_request(app, 'groups/omegaflight', method='delete') assert r.status_code == 404 - +@mark.group def test_group_add_users(app): db = app.db # must specify users @@ -637,6 +654,7 @@ def test_group_add_users(app): assert sorted([ u.name for u in group.users ]) == sorted(names) +@mark.group def test_group_delete_users(app): db = app.db # must specify users @@ -659,6 +677,61 @@ def test_group_delete_users(app): assert sorted([ u.name for u in group.users ]) == sorted(names[2:]) +# service API +@mark.services +def test_get_services(app, mockservice): + db = app.db + r = api_request(app, 'services') + r.raise_for_status() + assert r.status_code == 200 + + services = r.json() + assert services == { + 'mock-service': { + 'name': 'mock-service', + 'admin': True, + 'command': mockservice.command, + 'pid': mockservice.proc.pid, + 'prefix': mockservice.server.base_url, + 'url': mockservice.url, + } + } + + r = api_request(app, 'services', + headers=auth_header(db, 'user'), + ) + assert r.status_code == 403 + + +@mark.services +def test_get_service(app, mockservice): + db = app.db + r = api_request(app, 'services/%s' % mockservice.name) + r.raise_for_status() + assert r.status_code == 200 + + service = r.json() + assert service == { + 'name': 'mock-service', + 'admin': True, + 'command': mockservice.command, + 'pid': mockservice.proc.pid, + 'prefix': mockservice.server.base_url, + 'url': mockservice.url, + } + + r = api_request(app, 'services/%s' % mockservice.name, + headers={ + 'Authorization': 'token %s' % mockservice.api_token + } + ) + r.raise_for_status() + r = api_request(app, 'services/%s' % mockservice.name, + headers=auth_header(db, 'user'), + ) + assert r.status_code == 403 + + def test_root_api(app): base_url = app.hub.server.url url = ujoin(base_url, 'api') diff --git a/jupyterhub/tests/test_orm.py b/jupyterhub/tests/test_orm.py index 6a369ad5..182160e4 100644 --- a/jupyterhub/tests/test_orm.py +++ b/jupyterhub/tests/test_orm.py @@ -124,19 +124,17 @@ def test_service_tokens(db): assert service2.id != service.id -def test_service_servers(db): +def test_service_server(db): service = orm.Service(name='has_servers') db.add(service) db.commit() - assert service.servers == [] - servers = service.servers = [ - orm.Server(), - orm.Server(), - ] - assert [ s.id for s in servers ] == [ None, None ] + assert service.server is None + server = service.server = orm.Server() + assert service + assert server.id is None db.commit() - assert [ type(s.id) for s in servers ] == [ int, int ] + assert isinstance(server.id, int) def test_token_find(db): diff --git a/jupyterhub/tests/test_pages.py b/jupyterhub/tests/test_pages.py index 147e0a59..11e0c699 100644 --- a/jupyterhub/tests/test_pages.py +++ b/jupyterhub/tests/test_pages.py @@ -8,7 +8,7 @@ from ..utils import url_path_join as ujoin from .. import orm import mock -from .mocking import FormSpawner, public_url, public_host, user_url +from .mocking import FormSpawner, public_url, public_host from .test_api import api_request def get_page(path, app, hub=True, **kw): @@ -35,7 +35,7 @@ def test_root_auth(app): cookies = app.login_user('river') r = requests.get(public_url(app), cookies=cookies) r.raise_for_status() - assert r.url == user_url(app.users['river'], app) + assert r.url == public_url(app, app.users['river']) def test_root_redirect(app): name = 'wash' diff --git a/jupyterhub/tests/test_proxy.py b/jupyterhub/tests/test_proxy.py index 07e389a8..4ce1c1ef 100644 --- a/jupyterhub/tests/test_proxy.py +++ b/jupyterhub/tests/test_proxy.py @@ -137,11 +137,11 @@ def test_check_routes(app, io_loop): zoe = app.users[zoe] before = sorted(io_loop.run_sync(app.proxy.get_routes)) assert unquote(zoe.proxy_path) in before - io_loop.run_sync(lambda : app.proxy.check_routes(app.users)) + io_loop.run_sync(lambda : app.proxy.check_routes(app.users, app._service_map)) io_loop.run_sync(lambda : proxy.delete_user(zoe)) during = sorted(io_loop.run_sync(app.proxy.get_routes)) assert unquote(zoe.proxy_path) not in during - io_loop.run_sync(lambda : app.proxy.check_routes(app.users)) + io_loop.run_sync(lambda : app.proxy.check_routes(app.users, app._service_map)) after = sorted(io_loop.run_sync(app.proxy.get_routes)) assert unquote(zoe.proxy_path) in after assert before == after diff --git a/jupyterhub/tests/test_services.py b/jupyterhub/tests/test_services.py new file mode 100644 index 00000000..ff5a8ed1 --- /dev/null +++ b/jupyterhub/tests/test_services.py @@ -0,0 +1,101 @@ +"""Tests for services""" + +from binascii import hexlify +from contextlib import contextmanager +import os +from subprocess import Popen +import sys +from threading import Event +import time + +import requests +from tornado import gen +from tornado.ioloop import IOLoop + + +from .mocking import public_url +from ..utils import url_path_join, wait_for_http_server + +here = os.path.dirname(os.path.abspath(__file__)) +mockservice_py = os.path.join(here, 'mockservice.py') +mockservice_cmd = [sys.executable, mockservice_py] + +from ..utils import random_port + + +@contextmanager +def external_service(app, name='mockservice'): + env = { + 'JUPYTERHUB_API_TOKEN': hexlify(os.urandom(5)), + 'JUPYTERHUB_SERVICE_NAME': name, + 'JUPYTERHUB_API_URL': url_path_join(app.hub.server.url, 'api/'), + 'JUPYTERHUB_SERVICE_URL': 'http://127.0.0.1:%i' % random_port(), + } + p = Popen(mockservice_cmd, env=env) + IOLoop().run_sync(lambda : wait_for_http_server(env['JUPYTERHUB_SERVICE_URL'])) + try: + yield env + finally: + p.terminate() + + +def test_managed_service(app, mockservice): + service = mockservice + proc = service.proc + first_pid = proc.pid + assert proc.poll() is None + # shut it down: + proc.terminate() + proc.wait(10) + assert proc.poll() is not None + # ensure Hub notices and brings it back up: + for i in range(20): + if service.proc is not proc: + break + else: + time.sleep(0.2) + + assert service.proc.pid != first_pid + assert service.proc.poll() is None + + +def test_proxy_service(app, mockservice, io_loop): + name = mockservice.name + routes = io_loop.run_sync(app.proxy.get_routes) + url = public_url(app, mockservice) + '/foo' + r = requests.get(url, allow_redirects=False) + path = '/services/{}/foo'.format(name) + r.raise_for_status() + assert r.status_code == 200 + assert r.text.endswith(path) + + +def test_external_service(app, io_loop): + name = 'external' + with external_service(app, name=name) as env: + app.services = [{ + 'name': name, + 'admin': True, + 'url': env['JUPYTERHUB_SERVICE_URL'], + 'api_token': env['JUPYTERHUB_API_TOKEN'], + }] + app.init_services() + app.init_api_tokens() + evt = Event() + @gen.coroutine + def add_services(): + yield app.proxy.add_all_services(app._service_map) + evt.set() + app.io_loop.add_callback(add_services) + assert evt.wait(10) + service = app._service_map[name] + url = public_url(app, service) + '/api/users' + path = '/services/{}/api/users'.format(name) + r = requests.get(url, allow_redirects=False) + r.raise_for_status() + assert r.status_code == 200 + resp = r.json() + assert isinstance(resp, list) + assert len(resp) >= 1 + assert isinstance(resp[0], dict) + assert 'name' in resp[0] diff --git a/jupyterhub/tests/test_singleuser.py b/jupyterhub/tests/test_singleuser.py index fa82552c..155d7751 100644 --- a/jupyterhub/tests/test_singleuser.py +++ b/jupyterhub/tests/test_singleuser.py @@ -2,7 +2,7 @@ import requests -from .mocking import TestSingleUserSpawner, user_url +from .mocking import TestSingleUserSpawner, public_url from ..utils import url_path_join def test_singleuser_auth(app, io_loop): @@ -15,7 +15,7 @@ def test_singleuser_auth(app, io_loop): user = app.users['nandy'] if not user.running: io_loop.run_sync(user.spawn) - url = user_url(user, app) + url = public_url(app, user) # no cookies, redirects to login page r = requests.get(url) @@ -49,7 +49,7 @@ def test_disable_user_config(app, io_loop): io_loop.run_sync(user.spawn) io_loop.run_sync(lambda : app.proxy.add_user(user)) - url = user_url(user, app) + url = public_url(app, user) # with cookies, login successful r = requests.get(url, cookies=cookies) diff --git a/jupyterhub/user.py b/jupyterhub/user.py index 81d4cdaf..20f3aa8d 100644 --- a/jupyterhub/user.py +++ b/jupyterhub/user.py @@ -173,7 +173,7 @@ class User(HasTraits): @property def host(self): - """Get the *host* for my server (domain[:port])""" + """Get the *host* for my server (proto://domain[:port])""" # FIXME: escaped_name probably isn't escaped enough in general for a domain fragment parsed = urlparse(self.settings['subdomain_host']) h = '%s://%s.%s' % (parsed.scheme, self.escaped_name, parsed.netloc)