Files
jupyterhub/jupyterhub/app.py
Min RK e6335482c5 update some file names
default config file is now `jupyterhub_config.py`,
to be consistent with everything else

add config files, runtime files to .gitignore
2014-11-04 11:58:33 -08:00

833 lines
30 KiB
Python

#!/usr/bin/env python
"""The multi-user notebook application"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import binascii
import logging
import os
import socket
import sys
from datetime import datetime
from distutils.version import LooseVersion as V
from getpass import getuser
from subprocess import Popen
if sys.version_info[:2] < (3,3):
raise ValueError("Python < 3.3 not supported: %s" % sys.version)
from jinja2 import Environment, FileSystemLoader
from sqlalchemy.exc import OperationalError
import tornado.httpserver
import tornado.options
from tornado.httpclient import HTTPError
from tornado.ioloop import IOLoop, PeriodicCallback
from tornado.log import LogFormatter, app_log, access_log, gen_log
from tornado import gen, web
import IPython
if V(IPython.__version__) < V('3.0'):
raise ImportError("JupyterHub Requires IPython >= 3.0, found %s" % IPython.__version__)
from IPython.utils.traitlets import (
Unicode, Integer, Dict, TraitError, List, Bool, Any,
Type, Set, Instance, Bytes,
)
from IPython.config import Application, catch_config_error
here = os.path.dirname(__file__)
from . import handlers, apihandlers
from . import orm
from ._data import DATA_FILES_PATH
from .utils import (
url_path_join,
ISO8601_ms, ISO8601_s,
)
# classes for config
from .auth import Authenticator, PAMAuthenticator
from .spawner import Spawner, LocalProcessSpawner
aliases = {
'log-level': 'Application.log_level',
'f': 'JupyterHubApp.config_file',
'config': 'JupyterHubApp.config_file',
'y': 'JupyterHubApp.answer_yes',
'ssl-key': 'JupyterHubApp.ssl_key',
'ssl-cert': 'JupyterHubApp.ssl_cert',
'ip': 'JupyterHubApp.ip',
'port': 'JupyterHubApp.port',
'db': 'JupyterHubApp.db_url',
'pid-file': 'JupyterHubApp.pid_file',
}
flags = {
'debug': ({'Application' : {'log_level': logging.DEBUG}},
"set log level to logging.DEBUG (maximize logging output)"),
'generate-config': ({'JupyterHubApp': {'generate_config': True}},
"generate default config file"),
'no-db': ({'JupyterHubApp': {'db_url': 'sqlite:///:memory:'}},
"disable persisting state database to disk"
),
}
SECRET_BYTES = 2048 # the number of bytes to use when generating new secrets
class JupyterHubApp(Application):
"""An Application for starting a Multi-User Jupyter Notebook server."""
name = 'jupyterhub'
description = """Start a multi-user Jupyter Notebook server
Spawns a configurable-http-proxy and multi-user Hub,
which authenticates users and spawns single-user Notebook servers
on behalf of users.
"""
examples = """
generate default config file:
jupyterhub --generate-config -f /etc/jupyterhub/jupyterhub.py
spawn the server on 10.0.1.2:443 with https:
jupyterhub --ip 10.0.1.2 --port 443 --ssl-key my_ssl.key --ssl-cert my_ssl.cert
"""
aliases = Dict(aliases)
flags = Dict(flags)
classes = List([
Spawner,
LocalProcessSpawner,
Authenticator,
PAMAuthenticator,
])
config_file = Unicode('jupyterhub_config.py', config=True,
help="The config file to load",
)
generate_config = Bool(False, config=True,
help="Generate default config file",
)
answer_yes = Bool(False, config=True,
help="Answer yes to any questions (e.g. confirm overwrite)"
)
pid_file = Unicode('', config=True,
help="""File to write PID
Useful for daemonizing jupyterhub.
"""
)
last_activity_interval = Integer(600, config=True,
help="Interval (in seconds) at which to update last-activity timestamps."
)
proxy_check_interval = Integer(30, config=True,
help="Interval (in seconds) at which to check if the proxy is running."
)
data_files_path = Unicode(DATA_FILES_PATH, config=True,
help="The location of jupyter data files (e.g. /usr/local/share/jupyter)"
)
ssl_key = Unicode('', config=True,
help="""Path to SSL key file for the public facing interface of the proxy
Use with ssl_cert
"""
)
ssl_cert = Unicode('', config=True,
help="""Path to SSL certificate file for the public facing interface of the proxy
Use with ssl_key
"""
)
ip = Unicode('', config=True,
help="The public facing ip of the proxy"
)
port = Integer(8000, config=True,
help="The public facing port of the proxy"
)
base_url = Unicode('/', config=True,
help="The base URL of the entire application"
)
jinja_environment_options = Dict(config=True,
help="Supply extra arguments that will be passed to Jinja environment."
)
proxy_cmd = Unicode('configurable-http-proxy', config=True,
help="""The command to start the http proxy.
Only override if configurable-http-proxy is not on your PATH
"""
)
proxy_auth_token = Unicode(config=True,
help="""The Proxy Auth token.
Loaded from the CONFIGPROXY_AUTH_TOKEN env variable by default.
"""
)
def _proxy_auth_token_default(self):
token = os.environ.get('CONFIGPROXY_AUTH_TOKEN', None)
if not token:
self.log.warn('\n'.join([
"",
"Generating CONFIGPROXY_AUTH_TOKEN. Restarting the Hub will require restarting the proxy.",
"Set CONFIGPROXY_AUTH_TOKEN env or JupyterHubApp.proxy_auth_token config to avoid this message.",
"",
]))
token = orm.new_token()
return token
proxy_api_ip = Unicode('localhost', config=True,
help="The ip for the proxy API handlers"
)
proxy_api_port = Integer(config=True,
help="The port for the proxy API handlers"
)
def _proxy_api_port_default(self):
return self.port + 1
hub_port = Integer(8081, config=True,
help="The port for this process"
)
hub_ip = Unicode('localhost', config=True,
help="The ip for this process"
)
hub_prefix = Unicode('/hub/', config=True,
help="The prefix for the hub server. Must not be '/'"
)
def _hub_prefix_default(self):
return url_path_join(self.base_url, '/hub/')
def _hub_prefix_changed(self, name, old, new):
if new == '/':
raise TraitError("'/' is not a valid hub prefix")
newnew = new
if not new.startswith('/'):
newnew = '/' + new
if not newnew.endswith('/'):
newnew = newnew + '/'
if not newnew.startswith(self.base_url):
newnew = url_path_join(self.base_url, newnew)
if newnew != new:
self.hub_prefix = newnew
cookie_secret = Bytes(config=True, env='JPY_COOKIE_SECRET',
help="""The cookie secret to use to encrypt cookies.
Loaded from the JPY_COOKIE_SECRET env variable by default.
"""
)
cookie_secret_file = Unicode('jupyterhub_cookie_secret', config=True,
help="""File in which to store the cookie secret."""
)
authenticator_class = Type(PAMAuthenticator, Authenticator,
config=True,
help="""Class for authenticating users.
This should be a class with the following form:
- constructor takes one kwarg: `config`, the IPython config object.
- is a tornado.gen.coroutine
- returns username on success, None on failure
- takes two arguments: (handler, data),
where `handler` is the calling web.RequestHandler,
and `data` is the POST form data from the login page.
"""
)
authenticator = Instance(Authenticator)
def _authenticator_default(self):
return self.authenticator_class(config=self.config)
# class for spawning single-user servers
spawner_class = Type(LocalProcessSpawner, Spawner,
config=True,
help="""The class to use for spawning single-user servers.
Should be a subclass of Spawner.
"""
)
db_url = Unicode('sqlite:///jupyterhub.sqlite', config=True,
help="url for the database. e.g. `sqlite:///jupyterhub.sqlite`"
)
def _db_url_changed(self, name, old, new):
if '://' not in new:
# assume sqlite, if given as a plain filename
self.db_url = 'sqlite:///%s' % new
db_kwargs = Dict(config=True,
help="""Include any kwargs to pass to the database connection.
See sqlalchemy.create_engine for details.
"""
)
reset_db = Bool(False, config=True,
help="Purge and reset the database."
)
debug_db = Bool(False, config=True,
help="log all database transactions. This has A LOT of output"
)
db = Any()
session_factory = Any()
admin_users = Set(config=True,
help="""set of usernames of admin users
If unspecified, only the user that launches the server will be admin.
"""
)
tornado_settings = Dict(config=True)
handlers = List()
_log_formatter_cls = LogFormatter
def _log_level_default(self):
return logging.INFO
def _log_datefmt_default(self):
"""Exclude date from default date format"""
return "%H:%M:%S"
def _log_format_default(self):
"""override default log format to include time"""
return "%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s]%(end_color)s %(message)s"
def init_logging(self):
# This prevents double log messages because tornado use a root logger that
# self.log is a child of. The logging module dipatches log messages to a log
# and all of its ancenstors until propagate is set to False.
self.log.propagate = False
# hook up tornado 3's loggers to our app handlers
for log in (app_log, access_log, gen_log):
# ensure all log statements identify the application they come from
log.name = self.log.name
logger = logging.getLogger('tornado')
logger.propagate = True
logger.parent = self.log
logger.setLevel(self.log.level)
def init_ports(self):
if self.hub_port == self.port:
raise TraitError("The hub and proxy cannot both listen on port %i" % self.port)
if self.hub_port == self.proxy_api_port:
raise TraitError("The hub and proxy API cannot both listen on port %i" % self.hub_port)
if self.proxy_api_port == self.port:
raise TraitError("The proxy's public and API ports cannot both be %i" % self.port)
@staticmethod
def add_url_prefix(prefix, handlers):
"""add a url prefix to handlers"""
for i, tup in enumerate(handlers):
lis = list(tup)
lis[0] = url_path_join(prefix, tup[0])
handlers[i] = tuple(lis)
return handlers
def init_handlers(self):
h = []
h.extend(handlers.default_handlers)
h.extend(apihandlers.default_handlers)
# load handlers from the authenticator
h.extend(self.authenticator.get_handlers(self))
self.handlers = self.add_url_prefix(self.hub_prefix, h)
# some extra handlers, outside hub_prefix
self.handlers.extend([
(r"%s" % self.hub_prefix.rstrip('/'), web.RedirectHandler,
{
"url": self.hub_prefix,
"permanent": False,
}
),
(r"(?!%s).*" % self.hub_prefix, handlers.PrefixRedirectHandler),
(r'(.*)', handlers.Template404),
])
def _check_db_path(self, path):
"""More informative log messages for failed filesystem access"""
path = os.path.abspath(path)
parent, fname = os.path.split(path)
user = getuser()
if not os.path.isdir(parent):
self.log.error("Directory %s does not exist", parent)
if os.path.exists(parent) and not os.access(parent, os.W_OK):
self.log.error("%s cannot create files in %s", user, parent)
if os.path.exists(path) and not os.access(path, os.W_OK):
self.log.error("%s cannot edit %s", user, path)
def init_secrets(self):
trait_name = 'cookie_secret'
trait = self.traits()[trait_name]
env_name = trait.get_metadata('env')
secret_file = os.path.abspath(
os.path.expanduser(self.cookie_secret_file)
)
secret = self.cookie_secret
secret_from = 'config'
# load priority: 1. config, 2. env, 3. file
if not secret and os.environ.get(env_name):
secret_from = 'env'
self.log.info("Loading %s from env[%s]", trait_name, env_name)
secret = binascii.a2b_hex(os.environ[env_name])
if not secret and os.path.exists(secret_file):
secret_from = 'file'
perm = os.stat(secret_file).st_mode
if perm & 0o077:
self.log.error("Bad permissions on %s", secret_file)
else:
self.log.info("Loading %s from %s", trait_name, secret_file)
with open(secret_file) as f:
b64_secret = f.read()
try:
secret = binascii.a2b_base64(b64_secret)
except Exception as e:
self.log.error("%s does not contain b64 key: %s", secret_file, e)
if not secret:
secret_from = 'new'
self.log.debug("Generating new %s", trait_name)
secret = os.urandom(SECRET_BYTES)
if secret_file and secret_from == 'new':
# if we generated a new secret, store it in the secret_file
self.log.info("Writing %s to %s", trait_name, secret_file)
b64_secret = binascii.b2a_base64(secret).decode('ascii')
with open(secret_file, 'w') as f:
f.write(b64_secret)
try:
os.chmod(secret_file, 0o600)
except OSError:
self.log.warn("Failed to set permissions on %s", secret_file)
# store the loaded trait value
self.cookie_secret = secret
def init_db(self):
"""Create the database connection"""
self.log.debug("Connecting to db: %s", self.db_url)
try:
self.session_factory = orm.new_session_factory(
self.db_url,
reset=self.reset_db,
echo=self.debug_db,
**self.db_kwargs
)
self.db = self.session_factory()
except OperationalError as e:
self.log.error("Failed to connect to db: %s", self.db_url)
self.log.debug("Database error was:", exc_info=True)
if self.db_url.startswith('sqlite:///'):
self._check_db_path(self.db_url.split(':///', 1)[1])
self.exit(1)
def init_hub(self):
"""Load the Hub config into the database"""
self.hub = self.db.query(orm.Hub).first()
if self.hub is None:
self.hub = orm.Hub(
server=orm.Server(
ip=self.hub_ip,
port=self.hub_port,
base_url=self.hub_prefix,
cookie_name='jupyter-hub-token',
)
)
self.db.add(self.hub)
else:
server = self.hub.server
server.ip = self.hub_ip
server.port = self.hub_port
server.base_url = self.hub_prefix
self.db.commit()
def init_users(self):
"""Load users into and from the database"""
db = self.db
if not self.admin_users:
# add current user as admin if there aren't any others
admins = db.query(orm.User).filter(orm.User.admin==True)
if admins.first() is None:
self.admin_users.add(getuser())
for name in self.admin_users:
# ensure anyone specified as admin in config is admin in db
user = orm.User.find(db, name)
if user is None:
user = orm.User(name=name, admin=True)
db.add(user)
else:
user.admin = True
# the admin_users config variable will never be used after this point.
# only the database values will be referenced.
whitelist = self.authenticator.whitelist
if not whitelist:
self.log.info("Not using whitelist. Any authenticated user will be allowed.")
# add whitelisted users to the db
for name in whitelist:
user = orm.User.find(db, name)
if user is None:
user = orm.User(name=name)
db.add(user)
if whitelist:
# fill the whitelist with any users loaded from the db,
# so we are consistent in both directions.
# This lets whitelist be used to set up initial list,
# but changes to the whitelist can occur in the database,
# and persist across sessions.
for user in db.query(orm.User):
whitelist.add(user.name)
# The whitelist set and the users in the db are now the same.
# From this point on, any user changes should be done simultaneously
# to the whitelist set and user db, unless the whitelist is empty (all users allowed).
db.commit()
# load any still-active spawners from JSON
run_sync = IOLoop().run_sync
user_summaries = ['']
def _user_summary(user):
parts = ['{0: >8}'.format(user.name)]
if user.admin:
parts.append('admin')
if user.server:
parts.append('running at %s' % user.server)
return ' '.join(parts)
@gen.coroutine
def user_stopped(user):
status = yield user.spawner.poll()
self.log.warn("User %s server stopped with exit code: %s",
user.name, status,
)
yield self.proxy.delete_user(user)
yield user.stop()
for user in db.query(orm.User):
if not user.state:
# without spawner state, server isn't valid
user.server = None
user_summaries.append(_user_summary(user))
continue
self.log.debug("Loading state for %s from db", user.name)
user.spawner = spawner = self.spawner_class(
user=user, hub=self.hub, config=self.config,
)
status = run_sync(spawner.poll)
if status is None:
self.log.info("%s still running", user.name)
spawner.add_poll_callback(user_stopped, user)
spawner.start_polling()
else:
# user not running. This is expected if server is None,
# but indicates the user's server died while the Hub wasn't running
# if user.server is defined.
log = self.log.warn if user.server else self.log.debug
log("%s not running.", user.name)
user.server = None
user_summaries.append(_user_summary(user))
self.log.debug("Loaded users: %s", '\n'.join(user_summaries))
db.commit()
def init_proxy(self):
"""Load the Proxy config into the database"""
self.proxy = self.db.query(orm.Proxy).first()
if self.proxy is None:
self.proxy = orm.Proxy(
public_server=orm.Server(),
api_server=orm.Server(),
)
self.db.add(self.proxy)
self.db.commit()
self.proxy.auth_token = self.proxy_auth_token # not persisted
self.proxy.log = self.log
self.proxy.public_server.ip = self.ip
self.proxy.public_server.port = self.port
self.proxy.api_server.ip = self.proxy_api_ip
self.proxy.api_server.port = self.proxy_api_port
self.proxy.api_server.base_url = '/api/routes/'
self.db.commit()
@gen.coroutine
def start_proxy(self):
"""Actually start the configurable-http-proxy"""
# check for proxy
if self.proxy.public_server.is_up() or self.proxy.api_server.is_up():
# check for *authenticated* access to the proxy (auth token can change)
try:
yield self.proxy.get_routes()
except (HTTPError, OSError, socket.error) as e:
if isinstance(e, HTTPError) and e.code == 403:
msg = "Did CONFIGPROXY_AUTH_TOKEN change?"
else:
msg = "Is something else using %s?" % self.proxy.public_server.url
self.log.error("Proxy appears to be running at %s, but I can't access it (%s)\n%s",
self.proxy.public_server.url, e, msg)
self.exit(1)
return
else:
self.log.info("Proxy already running at: %s", self.proxy.public_server.url)
self.proxy_process = None
return
env = os.environ.copy()
env['CONFIGPROXY_AUTH_TOKEN'] = self.proxy.auth_token
cmd = [self.proxy_cmd,
'--ip', self.proxy.public_server.ip,
'--port', str(self.proxy.public_server.port),
'--api-ip', self.proxy.api_server.ip,
'--api-port', str(self.proxy.api_server.port),
'--default-target', self.hub.server.host,
]
if False:
# if self.log_level == logging.DEBUG:
cmd.extend(['--log-level', 'debug'])
if self.ssl_key:
cmd.extend(['--ssl-key', self.ssl_key])
if self.ssl_cert:
cmd.extend(['--ssl-cert', self.ssl_cert])
self.log.info("Starting proxy @ %s", self.proxy.public_server.url)
self.log.debug("Proxy cmd: %s", cmd)
self.proxy_process = Popen(cmd, env=env)
def _check():
status = self.proxy_process.poll()
if status is not None:
e = RuntimeError("Proxy failed to start with exit code %i" % status)
# py2-compatible `raise e from None`
e.__cause__ = None
raise e
for server in (self.proxy.public_server, self.proxy.api_server):
for i in range(10):
_check()
try:
yield server.wait_up(1)
except TimeoutError:
continue
else:
break
yield server.wait_up(1)
self.log.debug("Proxy started and appears to be up")
@gen.coroutine
def check_proxy(self):
if self.proxy_process.poll() is None:
return
self.log.error("Proxy stopped with exit code %r",
'unknown' if self.proxy_process is None else self.proxy_process.poll()
)
yield self.start_proxy()
self.log.info("Setting up routes on new proxy")
yield self.proxy.add_all_users()
self.log.info("New proxy back up, and good to go")
def init_tornado_settings(self):
"""Set up the tornado settings dict."""
base_url = self.hub.server.base_url
template_path = os.path.join(self.data_files_path, 'templates'),
jinja_env = Environment(
loader=FileSystemLoader(template_path),
**self.jinja_environment_options
)
login_url = self.authenticator.login_url(base_url)
logout_url = self.authenticator.logout_url(base_url)
settings = dict(
config=self.config,
log=self.log,
db=self.db,
proxy=self.proxy,
hub=self.hub,
admin_users=self.admin_users,
authenticator=self.authenticator,
spawner_class=self.spawner_class,
base_url=self.base_url,
cookie_secret=self.cookie_secret,
login_url=login_url,
logout_url=logout_url,
static_path=os.path.join(self.data_files_path, 'static'),
static_url_prefix=url_path_join(self.hub.server.base_url, 'static/'),
template_path=template_path,
jinja2_env=jinja_env,
)
# allow configured settings to have priority
settings.update(self.tornado_settings)
self.tornado_settings = settings
def init_tornado_application(self):
"""Instantiate the tornado Application object"""
self.tornado_application = web.Application(self.handlers, **self.tornado_settings)
def write_pid_file(self):
pid = os.getpid()
if self.pid_file:
self.log.debug("Writing PID %i to %s", pid, self.pid_file)
with open(self.pid_file, 'w') as f:
f.write('%i' % pid)
@catch_config_error
def initialize(self, *args, **kwargs):
super(JupyterHubApp, self).initialize(*args, **kwargs)
if self.generate_config:
return
self.load_config_file(self.config_file)
self.init_logging()
self.write_pid_file()
self.init_ports()
self.init_secrets()
self.init_db()
self.init_hub()
self.init_proxy()
self.init_users()
self.init_handlers()
self.init_tornado_settings()
self.init_tornado_application()
@gen.coroutine
def cleanup(self):
"""Shutdown our various subprocesses and cleanup runtime files."""
self.log.info("Cleaning up single-user servers...")
# request (async) process termination
futures = []
for user in self.db.query(orm.User):
if user.spawner is not None:
futures.append(user.stop())
# clean up proxy while SUS are shutting down
if self.proxy_process and self.proxy_process.poll() is None:
self.log.info("Cleaning up proxy[%i]...", self.proxy_process.pid)
try:
self.proxy_process.terminate()
except Exception as e:
self.log.error("Failed to terminate proxy process: %s", e)
# wait for the requests to stop finish:
for f in futures:
try:
yield f
except Exception as e:
self.log.error("Failed to stop user: %s", e)
self.db.commit()
if self.pid_file and os.path.exists(self.pid_file):
self.log.info("Cleaning up PID file %s", self.pid_file)
os.remove(self.pid_file)
# finally stop the loop once we are all cleaned up
self.log.info("...done")
def write_config_file(self):
"""Write our default config to a .py config file"""
if os.path.exists(self.config_file) and not self.answer_yes:
answer = ''
def ask():
prompt = "Overwrite %s with default config? [y/N]" % self.config_file
try:
return input(prompt).lower() or 'n'
except KeyboardInterrupt:
print('') # empty line
return 'n'
answer = ask()
while not answer.startswith(('y', 'n')):
print("Please answer 'yes' or 'no'")
answer = ask()
if answer.startswith('n'):
return
config_text = self.generate_config_file()
if isinstance(config_text, bytes):
config_text = config_text.decode('utf8')
print("Writing default config to: %s" % self.config_file)
with open(self.config_file, mode='w') as f:
f.write(config_text)
@gen.coroutine
def update_last_activity(self):
"""Update User.last_activity timestamps from the proxy"""
routes = yield self.proxy.get_routes()
for prefix, route in routes.items():
if 'user' not in route:
# not a user route, ignore it
continue
user = orm.User.find(self.db, route['user'])
if user is None:
self.log.warn("Found no user for route: %s", route)
continue
try:
dt = datetime.strptime(route['last_activity'], ISO8601_ms)
except Exception:
dt = datetime.strptime(route['last_activity'], ISO8601_s)
user.last_activity = max(user.last_activity, dt)
self.db.commit()
def start(self):
"""Start the whole thing"""
if self.generate_config:
self.write_config_file()
return
# start the proxy
try:
IOLoop().run_sync(self.start_proxy)
except Exception as e:
self.log.critical("Failed to start proxy", exc_info=True)
return
loop = IOLoop.current()
loop.add_callback(self.proxy.add_all_users)
if self.proxy_process:
# only check / restart the proxy if we started it in the first place.
# this means a restarted Hub cannot restart a Proxy that its
# predecessor started.
pc = PeriodicCallback(self.check_proxy, 1e3 * self.proxy_check_interval)
pc.start()
if self.last_activity_interval:
pc = PeriodicCallback(self.update_last_activity, 1e3 * self.last_activity_interval)
pc.start()
# start the webserver
http_server = tornado.httpserver.HTTPServer(self.tornado_application)
http_server.listen(self.hub_port)
try:
loop.start()
except KeyboardInterrupt:
print("\nInterrupted")
finally:
# run the cleanup step (in a new loop, because the interrupted one is unclean)
IOLoop().run_sync(self.cleanup)
main = JupyterHubApp.launch_instance
if __name__ == "__main__":
main()