From a54ce4051372937756a65b4080a9f7d03255e5be Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Fri, 29 Dec 2017 20:29:05 -0500 Subject: [PATCH 01/17] Unify docs, test dev env --- Makefile | 6 +++--- environment.yml | 13 +++++++++++++ readthedocs.yml | 4 ++++ requirements-test.txt | 3 --- 4 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 environment.yml create mode 100644 readthedocs.yml delete mode 100644 requirements-test.txt diff --git a/Makefile b/Makefile index 7ac1a768..36eb07d0 100644 --- a/Makefile +++ b/Makefile @@ -54,10 +54,10 @@ dev/%: PORT?=8888 dev/%: ## run a foreground container for a stack docker run -it --rm -p $(PORT):8888 $(DARGS) $(OWNER)/$(notdir $@) $(ARGS) -test-reqs: # install libraries required to run the integration tests - pip install -r requirements-test.txt +dev-env: # install libraries required to build docs and run tests + conda env create -f environment.yml -test/%: +test/%: ## run tests against a stack @TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test test/base-notebook: ## test supported options in the base notebook diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..ba969045 --- /dev/null +++ b/environment.yml @@ -0,0 +1,13 @@ +name: docker-stacks +channels: + - conda-forge +dependencies: +- python=3.6 +- pytest +- requests +- sphinx>=1.6 +- sphinx_rtd_theme +- pip: + - docker + - jupyter_alabaster_theme + - recommonmark==0.4.0 \ No newline at end of file diff --git a/readthedocs.yml b/readthedocs.yml new file mode 100644 index 00000000..0e4eabfc --- /dev/null +++ b/readthedocs.yml @@ -0,0 +1,4 @@ +conda: + file: environment.yml +python: + version: 3 \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index f6cdf908..00000000 --- a/requirements-test.txt +++ /dev/null @@ -1,3 +0,0 @@ -docker -pytest -requests \ No newline at end of file From 118198ac6528bf094361d33d83f8f09c57e13192 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Fri, 29 Dec 2017 20:32:22 -0500 Subject: [PATCH 02/17] Sphinx quickstart --- docs/Makefile | 20 ++++++ docs/conf.py | 169 +++++++++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 20 ++++++ docs/make.bat | 36 +++++++++++ 4 files changed, 245 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..7723800a --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = docker-stacks +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..fda5f2d3 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# docker-stacks documentation build configuration file, created by +# sphinx-quickstart on Fri Dec 29 20:32:10 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'docker-stacks' +copyright = '2017, Project Jupyter' +author = 'Project Jupyter' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '1.0' +# The full version, including alpha/beta/rc tags. +release = '1.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# This is required for the alabaster theme +# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +html_sidebars = { + '**': [ + 'relations.html', # needs 'show_related': True theme option to display + 'searchbox.html', + ] +} + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'docker-stacksdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'docker-stacks.tex', 'docker-stacks Documentation', + 'Project Jupyter', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'docker-stacks', 'docker-stacks Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'docker-stacks', 'docker-stacks Documentation', + author, 'docker-stacks', 'One line description of project.', + 'Miscellaneous'), +] + + + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..dbde53d6 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,20 @@ +.. docker-stacks documentation master file, created by + sphinx-quickstart on Fri Dec 29 20:32:10 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to docker-stacks's documentation! +========================================= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..4ee38f63 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=docker-stacks + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd From f70d52da586baec474860d469d467be1e68513ac Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sat, 30 Dec 2017 22:13:32 -0500 Subject: [PATCH 03/17] Outline, include some info from READMEs --- Makefile | 9 ++- docs/conf.py | 32 ++++---- docs/configuration.md | 177 ++++++++++++++++++++++++++++++++++++++++++ docs/contributing.md | 9 +++ docs/index.rst | 47 +++++++---- docs/using.md | 69 ++++++++++++++++ 6 files changed, 312 insertions(+), 31 deletions(-) create mode 100644 docs/configuration.md create mode 100644 docs/contributing.md create mode 100644 docs/using.md diff --git a/Makefile b/Makefile index 36eb07d0..214f79ce 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. -.PHONY: help test +.PHONY: docs help test -# Use bash for inline if-statements in test target +# Use bash for inline if-statements in arch_patch target SHELL:=bash OWNER:=jupyter ARCH:=$(shell uname -m) @@ -54,9 +54,12 @@ dev/%: PORT?=8888 dev/%: ## run a foreground container for a stack docker run -it --rm -p $(PORT):8888 $(DARGS) $(OWNER)/$(notdir $@) $(ARGS) -dev-env: # install libraries required to build docs and run tests +dev-env: ## install libraries required to build docs and run tests conda env create -f environment.yml +docs: ## build HTML documentation + make -C docs html + test/%: ## run tests against a stack @TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test diff --git a/docs/conf.py b/docs/conf.py index fda5f2d3..83f81094 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,17 +21,23 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) +# For conversion from markdown to html +import recommonmark.parser +from recommonmark.transform import AutoStructify + # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # -# needs_sphinx = '1.0' +needs_sphinx = '1.4' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [] +extensions = [ + 'jupyter_alabaster_theme' +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -78,12 +84,20 @@ pygments_style = 'sphinx' todo_include_todos = False +# -- Source ------------------------------------------------------------- + +source_parsers = { + '.md': 'recommonmark.parser.CommonMarkParser', +} + +source_suffix = ['.rst', '.md'] + # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = 'jupyter_alabaster_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -96,18 +110,6 @@ html_theme = 'alabaster' # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars -html_sidebars = { - '**': [ - 'relations.html', # needs 'show_related': True theme option to display - 'searchbox.html', - ] -} - # -- Options for HTMLHelp output ------------------------------------------ diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 00000000..07cb13e5 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,177 @@ +# Options and Configuration + +## Notebook Options + +The Docker container executes a [`start-notebook.sh` script](./start-notebook.sh) script by default. The `start-notebook.sh` script handles the `NB_UID`, `NB_GID` and `GRANT_SUDO` features documented in the next section, and then executes the `jupyter notebook`. + +You can pass [Jupyter command line options](https://jupyter.readthedocs.io/en/latest/projects/jupyter-command.html) through the `start-notebook.sh` script when launching the container. For example, to secure the Notebook server with a custom password hashed using `IPython.lib.passwd()` instead of the default token, run the following: + +``` +docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.password='sha1:74ba40f8a388:c913541b7ee99d15d5ed31d4226bf7838f83a50e' +``` + +For example, to set the base URL of the notebook server, run the following: + +``` +docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.base_url=/some/path +``` + +For example, to disable all authentication mechanisms (which is not a recommended practice): + +``` +docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.token='' +``` + +You can sidestep the `start-notebook.sh` script and run your own commands in the container. See the *Alternative Commands* section later in this document for more information. + +## Docker Options + +You may customize the execution of the Docker container and the command it is running with the following optional arguments. + +* `-e GEN_CERT=yes` - Generates a self-signed SSL certificate and configures Jupyter Notebook to use it to accept encrypted HTTPS connections. +* `-e NB_UID=1000` - Specify the uid of the `jovyan` user. Useful to mount host volumes with specific file ownership. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su jovyan` after adjusting the user id.) +* `-e NB_GID=100` - Specify the gid of the `jovyan` user. Useful to mount host volumes with specific file ownership. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su jovyan` after adjusting the group id.) +* `-e GRANT_SUDO=yes` - Gives the `jovyan` user passwordless `sudo` capability. Useful for installing OS packages. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su jovyan` after adding `jovyan` to sudoers.) **You should only enable `sudo` if you trust the user or if the container is running on an isolated host.** +* `-v /some/host/folder/for/work:/home/jovyan/work` - Mounts a host machine directory as folder in the container. Useful when you want to preserve notebooks and other work even after the container is destroyed. **You must grant the within-container notebook user or group (`NB_UID` or `NB_GID`) write access to the host directory (e.g., `sudo chown 1000 /some/host/folder/for/work`).** +* `--group-add users` - use this argument if you are also specifying + a specific user id to launch the container (`-u 5000`), rather than launching the container as root and relying on *NB_UID* and *NB_GID* to set the user and group. + +## SSL Certificates + +You may mount SSL key and certificate files into a container and configure Jupyter Notebook to use them to accept HTTPS connections. For example, to mount a host folder containing a `notebook.key` and `notebook.crt`: + +``` +docker run -d -p 8888:8888 \ + -v /some/host/folder:/etc/ssl/notebook \ + jupyter/base-notebook start-notebook.sh \ + --NotebookApp.keyfile=/etc/ssl/notebook/notebook.key + --NotebookApp.certfile=/etc/ssl/notebook/notebook.crt +``` + +Alternatively, you may mount a single PEM file containing both the key and certificate. For example: + +``` +docker run -d -p 8888:8888 \ + -v /some/host/folder/notebook.pem:/etc/ssl/notebook.pem \ + jupyter/base-notebook start-notebook.sh \ + --NotebookApp.certfile=/etc/ssl/notebook.pem +``` + +In either case, Jupyter Notebook expects the key and certificate to be a base64 encoded text file. The certificate file or PEM may contain one or more certificates (e.g., server, intermediate, and root). + +For additional information about using SSL, see the following: + +* The [docker-stacks/examples](https://github.com/jupyter/docker-stacks/tree/master/examples) for information about how to use [Let's Encrypt](https://letsencrypt.org/) certificates when you run these stacks on a publicly visible domain. +* The [jupyter_notebook_config.py](jupyter_notebook_config.py) file for how this Docker image generates a self-signed certificate. +* The [Jupyter Notebook documentation](https://jupyter-notebook.readthedocs.io/en/latest/public_server.html#securing-a-notebook-server) for best practices about securing a public notebook server in general. + +## Conda Environments + +The default Python 3.x [Conda environment](http://conda.pydata.org/docs/using/envs.html) resides in `/opt/conda`. The `/opt/conda/bin` directory is part of the default `jovyan` user's `$PATH`. That directory is also whitelisted for use in `sudo` commands by the `start.sh` script. + +The `jovyan` user has full read/write access to the `/opt/conda` directory. You can use either `conda` or `pip` to install new packages without any additional permissions. + +``` +# install a package into the default (python 3.x) environment +pip install some-package +conda install some-package +``` + +## Alternative Commands + +### start.sh + +The `start.sh` script supports the same features as the default `start-notebook.sh` script (e.g., `GRANT_SUDO`), but allows you to specify an arbitrary command to execute. For example, to run the text-based `ipython` console in a container, do the following: + +``` +docker run -it --rm jupyter/base-notebook start.sh ipython +``` + +Or, to run JupyterLab instead of the classic notebook, run the following: + +``` +docker run -it --rm -p 8888:8888 jupyter/base-notebook start.sh jupyter lab +``` + +This script is particularly useful when you derive a new Dockerfile from this image and install additional Jupyter applications with subcommands like `jupyter console`, `jupyter kernelgateway`, etc. + +### Others + +You can bypass the provided scripts and specify your an arbitrary start command. If you do, keep in mind that certain features documented above will not function (e.g., `GRANT_SUDO`). + +## Image Specifics + +## Spark and PySpark + +### Using Spark Local Mode + +This configuration is nice for using Spark on small, local data. + +0. Run the container as shown above. +2. Open a Python 2 or 3 notebook. +3. Create a `SparkContext` configured for local mode. + +For example, the first few cells in the notebook might read: + +```python +import pyspark +sc = pyspark.SparkContext('local[*]') + +# do something to prove it works +rdd = sc.parallelize(range(1000)) +rdd.takeSample(False, 5) +``` + +### Connecting to a Spark Cluster on Mesos + +This configuration allows your compute cluster to scale with your data. + +0. [Deploy Spark on Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html). +1. Configure each slave with [the `--no-switch_user` flag](https://open.mesosphere.com/reference/mesos-slave/) or create the `jovyan` user on every slave node. +2. Ensure Python 2.x and/or 3.x and any Python libraries you wish to use in your Spark lambda functions are installed on your Spark workers. +3. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) + * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. +4. Open a Python 2 or 3 notebook. +5. Create a `SparkConf` instance in a new notebook pointing to your Mesos master node (or Zookeeper instance) and Spark binary package location. +6. Create a `SparkContext` using this configuration. + +For example, the first few cells in a Python 3 notebook might read: + +```python +import os +# make sure pyspark tells workers to use python3 not 2 if both are installed +os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3' + +import pyspark +conf = pyspark.SparkConf() + +# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) +conf.setMaster("mesos://10.10.10.10:5050") +# point to spark binary package in HDFS or on local filesystem on all slave +# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) +conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-2.2.0-bin-hadoop2.7.tgz") +# set other options as desired +conf.set("spark.executor.memory", "8g") +conf.set("spark.core.connection.ack.wait.timeout", "1200") + +# create the context +sc = pyspark.SparkContext(conf=conf) + +# do something to prove it works +rdd = sc.parallelize(range(100000000)) +rdd.sumApprox(3) +``` + +To use Python 2 in the notebook and on the workers, change the `PYSPARK_PYTHON` environment variable to point to the location of the Python 2.x interpreter binary. If you leave this environment variable unset, it defaults to `python`. + +Of course, all of this can be hidden in an [IPython kernel startup script](http://ipython.org/ipython-doc/stable/development/config.html?highlight=startup#startup-files), but "explicit is better than implicit." :) + +## Connecting to a Spark Cluster on Standalone Mode + +Connection to Spark Cluster on Standalone Mode requires the following set of steps: + +0. Verify that the docker image (check the Dockerfile) and the Spark Cluster which is being deployed, run the same version of Spark. +1. [Deploy Spark on Standalone Mode](http://spark.apache.org/docs/latest/spark-standalone.html). +2. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) + * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. +3. The language specific instructions are almost same as mentioned above for Mesos, only the master url would now be something like spark://10.10.10.10:7077 diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..f087993d --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,9 @@ +# Contributing + +## Package Updates + +## New Packages + +## Tests + +## Community Stacks \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index dbde53d6..02ebce2a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,20 +1,41 @@ -.. docker-stacks documentation master file, created by - sphinx-quickstart on Fri Dec 29 20:32:10 2017. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +Jupyter Docker Stacks +===================== -Welcome to docker-stacks's documentation! -========================================= +Jupyter Docker Stacks are a set of ready-to-run Docker images containing Jupyter applications and interactive computing tools. You can use a stack image to start a personal Jupyter Notebook server in a local Docker container, to run JupyterLab servers for a team using JupyterHub, to write your own project Dockerfile, and so on. + +**Table of Contents** .. toctree:: - :maxdepth: 2 - :caption: Contents: + :maxdepth: 1 + using + features + contributing +Quick Start +----------- -Indices and tables -================== +The examples below may help you get started if you have Docker installed, know which Docker image you want to use, and want to launch a single Jupyter Notebook server in a container. The other pages in this documentation describe additional uses and features in detail.:: -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + # Run a Jupyter Notebook server in a Docker container started + # from the jupyter/scipy-notebook image built from Git commit 27ba573. + # All files saved in the container are lost when the notebook server exits. + # -ti: pseudo-TTY+STDIN open, so the logs appear in the terminal + # -rm: remove the container on exit + # -p: publish the notebook port 8888 as port 8888 on the host + docker run -ti --rm -p 8888:8888 jupyter/scipy-notebook:27ba573 + + # Run a Jupyter Notebook server in a Docker container started from the + # jupyter/r-notebook image built from Git commit cf1a3aa. + # All files written to ~/work in the container are saved to the + # current working on the host and persist even when the notebook server + # exits. + docker run -ti --rm -p 8888:8888 -v "$PWD":/home/jovyan/work jupyter/r-notebook:cf1a3aa + + # Run a Jupyter Notebook server in a background Docker container started + # from the latest jupyter/all-spark-notebook image available on the local + # machine or Docker Cloud. All files saved in the container are lost + # when the container is destroyed. + # -d: detach, run container in background. + # -P: Publish all exposed ports to random ports + docker run -d -P jupyter/all-spark-notebook:latest diff --git a/docs/using.md b/docs/using.md new file mode 100644 index 00000000..893bed02 --- /dev/null +++ b/docs/using.md @@ -0,0 +1,69 @@ +# Users Guide + +Using one of the Jupyter Docker Stacks requires two choices: + +1. Which Docker image you wish to use +2. How you wish to start Docker containers from that image + +This section provides details about the available images and runtimes to inform your choices. + +## Selecting an Image + +### Core Stacks + +The Jupyter team maintains a set of Docker image definitions in the https://github.com/jupyter/docker-stacks GitHub repository. The following table describes these images, and links to their source on GitHub and their builds on Docker Cloud. + +|Name |Description|GitHub |Image Tags| +|----------------------------|-----------|-----------|----------| +|jupyter/base-notebook |||| +|jupyter/minimal-notebook |||| +|jupyter/r-notebook |||| +|jupyter/scipy-notebook |||| +|jupyter/datascience-notebook|||| +|jupyter/tensorflow-notebook |||| +|jupyter/pyspark-notebook |||| +|jupyter/all-spark-notebook |||| +|----------------------------|-|-|-| + +#### Image Relationships + +The following diagram depicts the build dependencies between the core images (aka the `FROM` statement in their Dockerfiles). Any image lower in the tree inherits + +[![Image inheritance diagram](internal/inherit-diagram.svg)](http://interactive.blockdiag.com/?compression=deflate&src=eJyFzTEPgjAQhuHdX9Gws5sQjGzujsaYKxzmQrlr2msMGv-71K0srO_3XGud9NNA8DSfgzESCFlBSdi0xkvQAKTNugw4QnL6GIU10hvX-Zh7Z24OLLq2SjaxpvP10lX35vCf6pOxELFmUbQiUz4oQhYzMc3gCrRt2cWe_FKosmSjyFHC6OS1AwdQWCtyj7sfh523_BI9hKlQ25YdOFdv5fcH0kiEMA) + +#### Versioning + +[Click here for a commented build history of each image, with references to tag/SHA values.](https://github.com/jupyter/docker-stacks/wiki/Docker-build-history) + +The following are quick-links to READMEs about each image and their Docker image tags on Docker Cloud: + +* base-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/base-notebook), [SHA list](https://hub.docker.com/r/jupyter/base-notebook/tags/) +* minimal-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/minimal-notebook), [SHA list](https://hub.docker.com/r/jupyter/minimal-notebook/tags/) +* scipy-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/scipy-notebook), [SHA list](https://hub.docker.com/r/jupyter/scipy-notebook/tags/) +* r-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/r-notebook), [SHA list](https://hub.docker.com/r/jupyter/r-notebook/tags/) +* tensorflow-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/tensorflow-notebook), [SHA list](https://hub.docker.com/r/jupyter/tensorflow-notebook/tags/) +* datascience-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/datascience-notebook), [SHA list](https://hub.docker.com/r/jupyter/datascience-notebook/tags/) +* pyspark-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/pyspark-notebook), [SHA list](https://hub.docker.com/r/jupyter/pyspark-notebook/tags/) +* all-spark-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/all-spark-notebook), [SHA list](https://hub.docker.com/r/jupyter/all-spark-notebook/tags/) + + +The latest tag in each Docker Hub repository tracks the master branch HEAD reference on GitHub. This is a moving target and will make backward-incompatible changes regularly. +Any 12-character image tag on Docker Hub refers to a git commit SHA here on GitHub. See the Docker build history wiki page for a table of build details. +Stack contents (e.g., new library versions) will be updated upon request via PRs against this project. +Users looking for reproducibility or stability should always refer to specific git SHA tagged images in their work, not latest. +For legacy reasons, there are two additional tags named 3.2 and 4.0 on Docker Hub which point to images prior to our versioning scheme switch. +If you haven't already, pin your image to a tag, e.g. FROM jupyter/scipy-notebook:7c45ec67c8e7. latest is a moving target which can change in backward-incompatible ways as packages and operating systems are updated. + +## Community Stacks + +The Jupyter community maintains additional + +## Running a Container + +### Using the Docker Command Line + +### Using JupyterHub + +Every notebook stack is compatible with JupyterHub 0.5 or higher. When running with JupyterHub, you must override the Docker run command to point to the `start-singleuser.sh script, which starts a single-user instance of the Notebook server. See each stack's README for instructions on running with JupyterHub. + +### Using Binder \ No newline at end of file From 8b79f9893af3b0cc1aea65aab8e7143110eadb40 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 21 Jan 2018 22:23:02 -0500 Subject: [PATCH 04/17] Add image descriptions and content summaries --- docs/configuration.md | 10 +-- docs/images/inherit.svg | 60 ++++++++++++++++ docs/index.rst | 10 +-- docs/using.md | 152 ++++++++++++++++++++++++++++++---------- 4 files changed, 185 insertions(+), 47 deletions(-) create mode 100644 docs/images/inherit.svg diff --git a/docs/configuration.md b/docs/configuration.md index 07cb13e5..71105873 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2,7 +2,7 @@ ## Notebook Options -The Docker container executes a [`start-notebook.sh` script](./start-notebook.sh) script by default. The `start-notebook.sh` script handles the `NB_UID`, `NB_GID` and `GRANT_SUDO` features documented in the next section, and then executes the `jupyter notebook`. +The Docker container executes a `start-notebook.sh` script script by default. The `start-notebook.sh` script handles the `NB_UID`, `NB_GID` and `GRANT_SUDO` features documented in the next section, and then executes the `jupyter notebook`. You can pass [Jupyter command line options](https://jupyter.readthedocs.io/en/latest/projects/jupyter-command.html) through the `start-notebook.sh` script when launching the container. For example, to secure the Notebook server with a custom password hashed using `IPython.lib.passwd()` instead of the default token, run the following: @@ -101,9 +101,9 @@ You can bypass the provided scripts and specify your an arbitrary start command. ## Image Specifics -## Spark and PySpark +### Spark and PySpark -### Using Spark Local Mode +#### Using Spark Local Mode This configuration is nice for using Spark on small, local data. @@ -166,9 +166,9 @@ To use Python 2 in the notebook and on the workers, change the `PYSPARK_PYTHON` Of course, all of this can be hidden in an [IPython kernel startup script](http://ipython.org/ipython-doc/stable/development/config.html?highlight=startup#startup-files), but "explicit is better than implicit." :) -## Connecting to a Spark Cluster on Standalone Mode +#### Connecting to a Spark Cluster in Standalone Mode -Connection to Spark Cluster on Standalone Mode requires the following set of steps: +Connection to Spark Cluster in Standalone Mode requires the following set of steps: 0. Verify that the docker image (check the Dockerfile) and the Spark Cluster which is being deployed, run the same version of Spark. 1. [Deploy Spark on Standalone Mode](http://spark.apache.org/docs/latest/spark-standalone.html). diff --git a/docs/images/inherit.svg b/docs/images/inherit.svg new file mode 100644 index 00000000..c28779d6 --- /dev/null +++ b/docs/images/inherit.svg @@ -0,0 +1,60 @@ + + + + + + + + + blockdiag + + + + + + + + + + + + ubuntu@SHA + + base-notebook + + minimal-notebook + + scipy-notebook + + r-notebook + + tensorflow-notebook + + datascience-notebook + + pyspark-notebook + + all-spark-notebook + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/index.rst b/docs/index.rst index 02ebce2a..4907e360 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,7 @@ Jupyter Docker Stacks are a set of ready-to-run Docker images containing Jupyter :maxdepth: 1 using - features + configuration contributing Quick Start @@ -18,19 +18,19 @@ Quick Start The examples below may help you get started if you have Docker installed, know which Docker image you want to use, and want to launch a single Jupyter Notebook server in a container. The other pages in this documentation describe additional uses and features in detail.:: # Run a Jupyter Notebook server in a Docker container started - # from the jupyter/scipy-notebook image built from Git commit 27ba573. + # from the jupyter/scipy-notebook image built from Git commit 2c80cf3537ca. # All files saved in the container are lost when the notebook server exits. # -ti: pseudo-TTY+STDIN open, so the logs appear in the terminal # -rm: remove the container on exit # -p: publish the notebook port 8888 as port 8888 on the host - docker run -ti --rm -p 8888:8888 jupyter/scipy-notebook:27ba573 + docker run -ti --rm -p 8888:8888 jupyter/scipy-notebook:2c80cf3537ca # Run a Jupyter Notebook server in a Docker container started from the - # jupyter/r-notebook image built from Git commit cf1a3aa. + # jupyter/r-notebook image built from Git commit e5c5a7d3e52d. # All files written to ~/work in the container are saved to the # current working on the host and persist even when the notebook server # exits. - docker run -ti --rm -p 8888:8888 -v "$PWD":/home/jovyan/work jupyter/r-notebook:cf1a3aa + docker run -ti --rm -p 8888:8888 -v "$PWD":/home/jovyan/work jupyter/r-notebook:e5c5a7d3e52d # Run a Jupyter Notebook server in a background Docker container started # from the latest jupyter/all-spark-notebook image available on the local diff --git a/docs/using.md b/docs/using.md index 893bed02..2e8940ee 100644 --- a/docs/using.md +++ b/docs/using.md @@ -11,52 +11,132 @@ This section provides details about the available images and runtimes to inform ### Core Stacks -The Jupyter team maintains a set of Docker image definitions in the https://github.com/jupyter/docker-stacks GitHub repository. The following table describes these images, and links to their source on GitHub and their builds on Docker Cloud. +The Jupyter team maintains a set of Docker image definitions in the [https://github.com/jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) GitHub repository. The following sections describe these images including their contents, relationships, and versioning strategy. -|Name |Description|GitHub |Image Tags| -|----------------------------|-----------|-----------|----------| -|jupyter/base-notebook |||| -|jupyter/minimal-notebook |||| -|jupyter/r-notebook |||| -|jupyter/scipy-notebook |||| -|jupyter/datascience-notebook|||| -|jupyter/tensorflow-notebook |||| -|jupyter/pyspark-notebook |||| -|jupyter/all-spark-notebook |||| -|----------------------------|-|-|-| +#### jupyter/base-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/base-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/base-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/base-notebook/tags/) + +`jupyter/base-notebook` is a small image supporting the [options common across all core stacks](configuration.html). It is the basis for all other stacks. + +* Minimally-functional Jupyter Notebook server (e.g., no [pandoc](https://pandoc.org/) for saving notebooks as PDFs) +* [Miniconda](https://conda.io/miniconda.html) Python 3.x +* No preinstalled scientific computing packages +* Unprivileged user `jovyan` (`uid=1000`, configurable, see options) in group `users` (`gid=100`) with ownership over the `/home/jovyan` and `/opt/conda` paths +* `tini` as the container entrypoint and a `start-notebook.sh` script as the default command +* A `start-singleuser.sh` script useful for launching containers in JupyterHub +* A `start.sh` script useful for running alternative commands in the container (e.g. `ipython`, `jupyter kernelgateway`, `jupyter lab`) +* Options for a self-signed HTTPS certificate and passwordless sudo + +#### jupyter/minimal-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/minimal-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/minimal-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/minimal-notebook/tags/) + +`jupyter/minimal-notebook` adds command line tools useful when working in Jupyter applications. + +* Everything in `jupyter/base-notebook` +* [Pandoc](http://pandoc.org) and [TeX Live](https://www.tug.org/texlive/) for notebook document conversion +* [git](https://git-scm.com/), [emacs](https://www.gnu.org/software/emacs/), [jed](https://www.jedsoft.org/jed/), [vim](http://www.vim.org/), and unzip + +#### jupyter/r-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/r-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/r-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/r-notebook/tags/) + +`jupyter/r-notebook` includes popular packages from the R ecosystem. + +* Everything in `jupyter/minimal-notebook` and its ancestor images +* The [R](https://www.r-project.org/) interpreter and base environment +* [IRKernel](https://irkernel.github.io/) to support R code in Jupyter notebooks +* [tidyverse](https://www.tidyverse.org/) packages, including [ggplot2](http://ggplot2.org/), [dplyr](http://dplyr.tidyverse.org/), [tidyr](http://tidyr.tidyverse.org/), [readr](http://readr.tidyverse.org/), [purrr](http://purrr.tidyverse.org/), [tibble](http://tibble.tidyverse.org/), [stringr](http://stringr.tidyverse.org/), [lubridate](http://lubridate.tidyverse.org/), and [broom](https://cran.r-project.org/web/packages/broom/vignettes/broom.html) from [conda-forge](https://conda-forge.github.io/feedstocks) +* [plyr](https://cran.r-project.org/web/packages/plyr/index.html), [devtools](https://cran.r-project.org/web/packages/devtools/index.html), [shiny](https://shiny.rstudio.com/), [rmarkdown](http://rmarkdown.rstudio.com/), [forecast](https://cran.r-project.org/web/packages/forecast/forecast.pdf), [rsqlite](https://cran.r-project.org/web/packages/RSQLite/index.html), [reshape2](https://cran.r-project.org/web/packages/reshape2/reshape2.pdf), [nycflights13](https://cran.r-project.org/web/packages/nycflights13/index.html), [caret](http://topepo.github.io/caret/index.html), [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html), and [randomforest](https://cran.r-project.org/web/packages/randomForest/randomForest.pdf) packages from [conda-forge](https://conda-forge.github.io/feedstocks) + +#### jupyter/scipy-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/scipy-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/scipy-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/scipy-notebook/tags/) + +`jupyter/scipy-notebook` includes popular packages from the scientific Python ecosystem. + +* Everything in `jupyter/minimal-notebook` and its ancestor images +* [pandas](https://pandas.pydata.org/), [numexpr](https://github.com/pydata/numexpr), [matplotlib](https://matplotlib.org/), [scipy](https://www.scipy.org/), [seaborn](https://seaborn.pydata.org/), [scikit-learn(http://scikit-learn.org/stable/)], [scikit-image](http://scikit-image.org/), [sympy](http://www.sympy.org/en/index.html), [cython](http://cython.org/), [patsy](https://patsy.readthedocs.io/en/latest/), [statsmodel](http://www.statsmodels.org/stable/index.html), [cloudpickle](https://github.com/cloudpipe/cloudpickle), [dill](https://pypi.python.org/pypi/dill), [numba](https://numba.pydata.org/), [bokeh](https://bokeh.pydata.org/en/latest/), [sqlalchemy](https://www.sqlalchemy.org/), [hdf5](http://www.h5py.org/), [vincent](http://vincent.readthedocs.io/en/latest/), [beautifulsoup](https://www.crummy.com/software/BeautifulSoup/), [protobuf](https://developers.google.com/protocol-buffers/docs/pythontutorial), and [xlrd](http://www.python-excel.org/) packages +* [ipywidgets](https://ipywidgets.readthedocs.io/en/stable/) for interactive visualizations in Python notebooks +* [Facets](https://github.com/PAIR-code/facets) for visualizing machine learning datasets + +#### jupyter/tensorflow-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/tensorflow-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/tensorflow-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/tensorflow-notebook/tags/) + +`jupyter/tensorflow-notebook` includes popular Python deep learning libraries. + +* Everything in `jupyter/scipy-notebook` and its ancestor images +* [tensorflow](https://www.tensorflow.org/) and [keras](https://keras.io/) machine learning libraries + +#### jupyter/datascience-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/datascience-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/datascience-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/datascience-notebook/tags/) + +`jupyter/datascience-notebook` includes libraries for data analysis from the Julia, Python, and R communities. + +* Everything in the `jupyter/scipy-notebook` and `jupyter/r-notebook` images, and their ancestor images +* The [Julia](https://julialang.org/) compiler and base environment +* [IJulia](https://github.com/JuliaLang/IJulia.jl) to support Julia code in Jupyter notebooks +* [HDF5](https://github.com/JuliaIO/HDF5.jl), [Gadfly](http://gadflyjl.org/stable/), and [RDatasets](https://github.com/johnmyleswhite/RDatasets.jl) packages + +#### jupyter/pyspark-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/pyspark-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/pyspark-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/pyspark-notebook/tags/) + +`jupyter/pyspark-notebook` includes Python support for Apache Spark, optionally on Mesos. + +* Everything in `jupyter/scipy-notebook` and its ancestor images +* [Apache Spark](https://spark.apache.org/) with Hadoop binaries +* [Mesos](http://mesos.apache.org/) client libraries + +#### jupyter/all-spark-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/all-spark-notebook) +| [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/all-spark-notebook/Dockerfile) +| [Docker Hub image tags](https://hub.docker.com/r/jupyter/all-spark-notebook/tags/) + +`jupyter/all-spark-notebook` includes Python, R, and Scala support for Apache Spark, optionally on Mesos. + +* Everything in `jupyter/pyspark-notebook` and its ancestor images +* [IRKernel](https://irkernel.github.io/) to support R code in Jupyter notebooks +* [Apache Toree](https://toree.apache.org/) and [spylon-kernel](https://github.com/maxpoint/spylon-kernel) to support Scala code in Jupyter notebooks +* [ggplot2](http://ggplot2.org/), [sparklyr](http://spark.rstudio.com/), and [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html) packages #### Image Relationships -The following diagram depicts the build dependencies between the core images (aka the `FROM` statement in their Dockerfiles). Any image lower in the tree inherits +The following diagram depicts the build dependency tree of the core images. (i.e., the `FROM` statements in their Dockerfiles). Any given image inherits the complete content of all ancestor images pointing to it. -[![Image inheritance diagram](internal/inherit-diagram.svg)](http://interactive.blockdiag.com/?compression=deflate&src=eJyFzTEPgjAQhuHdX9Gws5sQjGzujsaYKxzmQrlr2msMGv-71K0srO_3XGud9NNA8DSfgzESCFlBSdi0xkvQAKTNugw4QnL6GIU10hvX-Zh7Z24OLLq2SjaxpvP10lX35vCf6pOxELFmUbQiUz4oQhYzMc3gCrRt2cWe_FKosmSjyFHC6OS1AwdQWCtyj7sfh523_BI9hKlQ25YdOFdv5fcH0kiEMA) +[![Image inheritance diagram](images/inherit.svg)](http://interactive.blockdiag.com/?compression=deflate&src=eJyFzTEPgjAQhuHdX9Gws5sQjGzujsaYKxzmQrlr2msMGv-71K0srO_3XGud9NNA8DSfgzESCFlBSdi0xkvQAKTNugw4QnL6GIU10hvX-Zh7Z24OLLq2SjaxpvP10lX35vCf6pOxELFmUbQiUz4oQhYzMc3gCrRt2cWe_FKosmSjyFHC6OS1AwdQWCtyj7sfh523_BI9hKlQ25YdOFdv5fcH0kiEMA) + +#### Builds + +Pull requests to the `jupyter/docker-stacks` repository trigger builds of all images on Travis CI. These images are for testing purposes only and are not saved for use. When pull requests merge to master, all images rebuild on Docker Cloud and become available to `docker pull` from Docker Hub. #### Versioning -[Click here for a commented build history of each image, with references to tag/SHA values.](https://github.com/jupyter/docker-stacks/wiki/Docker-build-history) +The `latest` tag in each Docker Hub repository tracks the master branch `HEAD` reference on GitHub. `latest` is a moving target, by definition, and will have backward-incompatible changes regularly. -The following are quick-links to READMEs about each image and their Docker image tags on Docker Cloud: +Every image on Docker Hub also receives a 12-character tag which corresponds with the git commit SHA that triggered the image build. You can inspect the state of the `jupyter/docker-stacks` repository for that commit to review the definition of the image (e.g., images with tag 7c45ec67c8e7 were built from [https://github.com/jupyter/docker-stacks/tree/7c45ec67c8e7](https://github.com/jupyter/docker-stacks/tree/7c45ec67c8e7)). -* base-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/base-notebook), [SHA list](https://hub.docker.com/r/jupyter/base-notebook/tags/) -* minimal-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/minimal-notebook), [SHA list](https://hub.docker.com/r/jupyter/minimal-notebook/tags/) -* scipy-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/scipy-notebook), [SHA list](https://hub.docker.com/r/jupyter/scipy-notebook/tags/) -* r-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/r-notebook), [SHA list](https://hub.docker.com/r/jupyter/r-notebook/tags/) -* tensorflow-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/tensorflow-notebook), [SHA list](https://hub.docker.com/r/jupyter/tensorflow-notebook/tags/) -* datascience-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/datascience-notebook), [SHA list](https://hub.docker.com/r/jupyter/datascience-notebook/tags/) -* pyspark-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/pyspark-notebook), [SHA list](https://hub.docker.com/r/jupyter/pyspark-notebook/tags/) -* all-spark-notebook: [README](https://github.com/jupyter/docker-stacks/tree/master/all-spark-notebook), [SHA list](https://hub.docker.com/r/jupyter/all-spark-notebook/tags/) +You must refer to git-SHA image tags when stability and reproducibility are important in your work. (e.g. `FROM jupyter/scipy-notebook:7c45ec67c8e7`, `docker run -it --rm jupyter/scipy-notebook:7c45ec67c8e7`). You should only use `latest` when a one-off container instance is acceptable (e.g., you want to briefly try a new library in a notebook). - -The latest tag in each Docker Hub repository tracks the master branch HEAD reference on GitHub. This is a moving target and will make backward-incompatible changes regularly. -Any 12-character image tag on Docker Hub refers to a git commit SHA here on GitHub. See the Docker build history wiki page for a table of build details. -Stack contents (e.g., new library versions) will be updated upon request via PRs against this project. -Users looking for reproducibility or stability should always refer to specific git SHA tagged images in their work, not latest. -For legacy reasons, there are two additional tags named 3.2 and 4.0 on Docker Hub which point to images prior to our versioning scheme switch. -If you haven't already, pin your image to a tag, e.g. FROM jupyter/scipy-notebook:7c45ec67c8e7. latest is a moving target which can change in backward-incompatible ways as packages and operating systems are updated. - -## Community Stacks - -The Jupyter community maintains additional +### Community Stacks ## Running a Container @@ -64,6 +144,4 @@ The Jupyter community maintains additional ### Using JupyterHub -Every notebook stack is compatible with JupyterHub 0.5 or higher. When running with JupyterHub, you must override the Docker run command to point to the `start-singleuser.sh script, which starts a single-user instance of the Notebook server. See each stack's README for instructions on running with JupyterHub. - ### Using Binder \ No newline at end of file From 86b9d4dae6e6b52ea5538bf35ada46ee17b83537 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 28 Jan 2018 22:43:24 -0500 Subject: [PATCH 05/17] Notes about community stacks --- .gitignore | 1 + docs/using.md | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/.gitignore b/.gitignore index 17dcb67c..04308b12 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,4 @@ target/ dockerspawner dockerspawner.tar.gz *.orig +.ipynb_checkpoints/ diff --git a/docs/using.md b/docs/using.md index 2e8940ee..aacb5b9b 100644 --- a/docs/using.md +++ b/docs/using.md @@ -138,8 +138,16 @@ You must refer to git-SHA image tags when stability and reproducibility are impo ### Community Stacks +The core stacks are just a tiny sample of what's possible when combining Jupyter with other technologies. We encourage members of the Jupyter community to create their own stacks based on the core images and link them below. + +*Nothing here yet!* + +See the [contributing guide](contributing#Community-Stacks) for information about how to create your own Jupyter Docker Stack. + ## Running a Container + + ### Using the Docker Command Line ### Using JupyterHub From 2cb59bd3cf555a588d250db9743c3ba049f48b84 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 4 Feb 2018 12:29:47 -0500 Subject: [PATCH 06/17] Fill-in and refine instructions to run --- docs/index.rst | 27 +++++------------------ docs/using.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 4907e360..638e671b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,27 +15,12 @@ Jupyter Docker Stacks are a set of ready-to-run Docker images containing Jupyter Quick Start ----------- -The examples below may help you get started if you have Docker installed, know which Docker image you want to use, and want to launch a single Jupyter Notebook server in a container. The other pages in this documentation describe additional uses and features in detail.:: +The two examples below may help you get started if you `have Docker installed `_, know :doc:`which Docker image ` you want to use, and want to launch a single Jupyter Notebook server in a container. The other pages in this documentation describe additional uses and features in detail. - # Run a Jupyter Notebook server in a Docker container started - # from the jupyter/scipy-notebook image built from Git commit 2c80cf3537ca. - # All files saved in the container are lost when the notebook server exits. - # -ti: pseudo-TTY+STDIN open, so the logs appear in the terminal - # -rm: remove the container on exit - # -p: publish the notebook port 8888 as port 8888 on the host - docker run -ti --rm -p 8888:8888 jupyter/scipy-notebook:2c80cf3537ca +**Example 1:** This command pulls the `jupyter/scipy-notebook` image tagged `2c80cf3537ca` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 8888. The server logs appear in the terminal and include a URL to the notebook server. The container remains intact for restart after notebook server exit.:: - # Run a Jupyter Notebook server in a Docker container started from the - # jupyter/r-notebook image built from Git commit e5c5a7d3e52d. - # All files written to ~/work in the container are saved to the - # current working on the host and persist even when the notebook server - # exits. - docker run -ti --rm -p 8888:8888 -v "$PWD":/home/jovyan/work jupyter/r-notebook:e5c5a7d3e52d + docker run -p 8888:8888 jupyter/scipy-notebook:2c80cf3537ca - # Run a Jupyter Notebook server in a background Docker container started - # from the latest jupyter/all-spark-notebook image available on the local - # machine or Docker Cloud. All files saved in the container are lost - # when the container is destroyed. - # -d: detach, run container in background. - # -P: Publish all exposed ports to random ports - docker run -d -P jupyter/all-spark-notebook:latest +**Example 2:** This command pulls the `jupyter/r-notebook` image tagged `e5c5a7d3e52d` from Docker Hub if it is not already present on the local host. It then starts an *ephemeral* container running a Jupyter Notebook server and exposes the server on host port 10000. The command mounts the current working directory on the host as `/home/jovyan/work` in the container. The container is destroyed after notebook server exit, but any files written to `~/work` in the container remain intact on the host.:: + + docker run --rm -p 10000:8888 -v "$PWD":/home/jovyan/work jupyter/r-notebook:e5c5a7d3e52d diff --git a/docs/using.md b/docs/using.md index aacb5b9b..f24f0681 100644 --- a/docs/using.md +++ b/docs/using.md @@ -5,7 +5,7 @@ Using one of the Jupyter Docker Stacks requires two choices: 1. Which Docker image you wish to use 2. How you wish to start Docker containers from that image -This section provides details about the available images and runtimes to inform your choices. +This section provides details about the available images and runtimes. ## Selecting an Image @@ -140,16 +140,67 @@ You must refer to git-SHA image tags when stability and reproducibility are impo The core stacks are just a tiny sample of what's possible when combining Jupyter with other technologies. We encourage members of the Jupyter community to create their own stacks based on the core images and link them below. -*Nothing here yet!* +*Nothing here yet! You can be the first!* See the [contributing guide](contributing#Community-Stacks) for information about how to create your own Jupyter Docker Stack. ## Running a Container +### Using the Docker CLI + +You can launch a local Docker container from the Jupyter Docker Stacks using the [Docker command line interface](https://docs.docker.com/engine/reference/commandline/cli/). There are numerous ways to configure containers using the CLI. The following are a few common patterns. + +``` +docker run -p 8888:8888 jupyter/scipy-notebook:2c80cf3537ca +``` + +This command pulls the `jupyter/scipy-notebook` image tagged `2c80cf3537ca` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 8888. The server logs appear in the terminal and include a URL to the notebook server. + +``` +Executing the command: jupyter notebook +[I 15:33:00.567 NotebookApp] Writing notebook server cookie secret to /home/jovyan/.local/share/jupyter/runtime/notebook_cookie_secret +[W 15:33:01.084 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. +[I 15:33:01.150 NotebookApp] JupyterLab alpha preview extension loaded from /opt/conda/lib/python3.6/site-packages/jupyterlab +[I 15:33:01.150 NotebookApp] JupyterLab application directory is /opt/conda/share/jupyter/lab +[I 15:33:01.155 NotebookApp] Serving notebooks from local directory: /home/jovyan +[I 15:33:01.156 NotebookApp] 0 active kernels +[I 15:33:01.156 NotebookApp] The Jupyter Notebook is running at: +[I 15:33:01.157 NotebookApp] http://[all ip addresses on your system]:8888/?token=112bb073331f1460b73768c76dffb2f87ac1d4ca7870d46a +[I 15:33:01.157 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation). +[C 15:33:01.160 NotebookApp] + + Copy/paste this URL into your browser when you connect for the first time, + to login with a token: + http://localhost:8888/?token=112bb073331f1460b73768c76dffb2f87ac1d4ca7870d46a +``` + +Pressing `Ctrl-C` shuts down the notebook server but leaves the container intact on disk for later restart or permanent deletion using commands like the following: + +``` +docker ps -a +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +d67fe77f1a84 jupyter/base-notebook "tini -- start-noteb…" 44 seconds ago Exited (0) 39 seconds ago cocky_mirzakhani + +docker start -a d67fe77f1a84 +Executing the command: jupyter notebook +[W 16:45:02.020 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. +... -### Using the Docker Command Line +docker rm -a d67fe77f1a84 +d67fe77f1a84 +``` + +TODO: ephemeral container with host mount + +### Using Binder + +[Binder](https://mybinder.org/) is a service that allows you to create and share custom computing environments for projects in version control. You can use any of the Jupyter Docker Stack images as a basis for a Binder-compatible Dockerfile. See the [docker-stacks example](https://mybinder.readthedocs.io/en/latest/sample_repos.html#using-a-docker-image-from-the-jupyter-docker-stacks-repository) and [Using a Dockerfile](https://mybinder.readthedocs.io/en/latest/dockerfile.html) sections in the [Binder documentation](https://mybinder.readthedocs.io/en/latest/index.html) for instructions. ### Using JupyterHub -### Using Binder \ No newline at end of file +You can configure JupyterHub to launcher Docker containers from the Jupyter Docker Stacks images. If you've been following the [Zero to JupyterHub with Kubernetes](http://zero-to-jupyterhub.readthedocs.io/en/latest/) guide, see the [Use an existing Docker image](http://zero-to-jupyterhub.readthedocs.io/en/latest/user-environment.html#use-an-existing-docker-image) section for details. If you have a custom JupyterHub deployment, see the [Picking or building a Docker image](https://github.com/jupyterhub/dockerspawner#picking-or-building-a-docker-image) instructions for the [dockerspawner](https://github.com/jupyterhub/dockerspawner) instead. + +### Using Other Tools and Services + +You can use the Jupyter Docker Stacks with any Docker-compatible technology (e.g., [Docker Compose](https://docs.docker.com/compose/), [docker-py](https://github.com/docker/docker-py), your favorite cloud container service). See the documentation of the tool, library, or service for details about how to reference, configure, and launch containers from these images. \ No newline at end of file From a319f52e2e4adcd860abedabd95660f0747029b0 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 4 Feb 2018 12:48:33 -0500 Subject: [PATCH 07/17] Fix travis env creation command --- .travis.yml | 2 +- .vscode/settings.json | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json diff --git a/.travis.yml b/.travis.yml index 723ffb62..e1707b34 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,6 @@ sudo: required services: - docker install: - - make test-reqs + - make dev-env script: - make build-test-all DARGS="--build-arg TEST_ONLY_BUILD=1" diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..6a13b606 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,22 @@ +{ + "cSpell.enabledLanguageIds": [ + "c", + "cpp", + "csharp", + "go", + "handlebars", + "javascript", + "javascriptreact", + "json", + "latex", + "markdown", + "php", + "plaintext", + "python", + "restructuredtext", + "text", + "typescript", + "typescriptreact", + "yml" + ] +} \ No newline at end of file From f1400f661247de8f1d5d78f1f72475883c3b326c Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 4 Feb 2018 14:00:26 -0500 Subject: [PATCH 08/17] Stick with pip for travis testing --- Makefile | 2 +- environment.yml | 13 ------------- requirements-dev.txt | 7 +++++++ 3 files changed, 8 insertions(+), 14 deletions(-) delete mode 100644 environment.yml create mode 100644 requirements-dev.txt diff --git a/Makefile b/Makefile index 214f79ce..ff75d058 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ dev/%: ## run a foreground container for a stack docker run -it --rm -p $(PORT):8888 $(DARGS) $(OWNER)/$(notdir $@) $(ARGS) dev-env: ## install libraries required to build docs and run tests - conda env create -f environment.yml + pip install -r requirements-dev.txt docs: ## build HTML documentation make -C docs html diff --git a/environment.yml b/environment.yml deleted file mode 100644 index ba969045..00000000 --- a/environment.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: docker-stacks -channels: - - conda-forge -dependencies: -- python=3.6 -- pytest -- requests -- sphinx>=1.6 -- sphinx_rtd_theme -- pip: - - docker - - jupyter_alabaster_theme - - recommonmark==0.4.0 \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..2352874b --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,7 @@ +docker +jupyter_alabaster_theme +pytest +recommonmark==0.4.0 +requests +sphinx>=1.6 +sphinx_rtd_theme \ No newline at end of file From 51ab78f4a99b46dd91a72f727235f98328b8a5aa Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 4 Feb 2018 17:38:24 -0500 Subject: [PATCH 09/17] Finish single container examples --- docs/index.rst | 2 +- docs/using.md | 72 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 638e671b..5bbd62d3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,6 +21,6 @@ The two examples below may help you get started if you `have Docker installed Date: Sat, 24 Mar 2018 23:24:46 -0400 Subject: [PATCH 10/17] First draft configuration page --- docs/configuration.md | 187 +++++++++++++++++++++++++++++++----------- docs/using.md | 4 +- 2 files changed, 139 insertions(+), 52 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 71105873..712afd4b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,44 +1,46 @@ # Options and Configuration +When launched as Docker containers, all of the Jupyter Docker Stacks start a Jupyter Notebook server by default. They do so by executing a `start-notebook.sh` script which configures the internal container environment and then runs `jupyter notebook $*`, passing it any command line arguments received. + +This page describes the options supported by the startup script as well as how to bypass it to run alternative commands. + ## Notebook Options -The Docker container executes a `start-notebook.sh` script script by default. The `start-notebook.sh` script handles the `NB_UID`, `NB_GID` and `GRANT_SUDO` features documented in the next section, and then executes the `jupyter notebook`. - -You can pass [Jupyter command line options](https://jupyter.readthedocs.io/en/latest/projects/jupyter-command.html) through the `start-notebook.sh` script when launching the container. For example, to secure the Notebook server with a custom password hashed using `IPython.lib.passwd()` instead of the default token, run the following: +You can pass [Jupyter command line options](https://jupyter.readthedocs.io/en/latest/projects/jupyter-command.html) to the `start-notebook.sh` script when launching the container. For example, to secure the Notebook server with a custom password hashed using `IPython.lib.passwd()` instead of the default token, you can run the following: ``` docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.password='sha1:74ba40f8a388:c913541b7ee99d15d5ed31d4226bf7838f83a50e' ``` -For example, to set the base URL of the notebook server, run the following: +For example, to set the base URL of the notebook server, you can run the following: ``` docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.base_url=/some/path ``` -For example, to disable all authentication mechanisms (which is not a recommended practice): +For example, to ignore best practice and disable all authentication, you can run the following: ``` docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.token='' ``` -You can sidestep the `start-notebook.sh` script and run your own commands in the container. See the *Alternative Commands* section later in this document for more information. - ## Docker Options -You may customize the execution of the Docker container and the command it is running with the following optional arguments. +You may instruct the `start-notebook.sh` script to customize the container environment before launching +the notebook server. You do so by passing arguments to the `docker run` command. -* `-e GEN_CERT=yes` - Generates a self-signed SSL certificate and configures Jupyter Notebook to use it to accept encrypted HTTPS connections. -* `-e NB_UID=1000` - Specify the uid of the `jovyan` user. Useful to mount host volumes with specific file ownership. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su jovyan` after adjusting the user id.) -* `-e NB_GID=100` - Specify the gid of the `jovyan` user. Useful to mount host volumes with specific file ownership. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su jovyan` after adjusting the group id.) -* `-e GRANT_SUDO=yes` - Gives the `jovyan` user passwordless `sudo` capability. Useful for installing OS packages. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su jovyan` after adding `jovyan` to sudoers.) **You should only enable `sudo` if you trust the user or if the container is running on an isolated host.** +* `-e NB_USER=jovyan` - Instructs the startup script to change the default container username from `jovyan` to the provided value. Causes the script to rename the `jovyan` user home folder. +* `-e NB_UID=1000` - Instructs the startup script to switch the numeric user ID of `$NB_USER` to the given value. This feature is useful when mounting host volumes with specific owner permissions. For this option to take effect, you must run the container with `--user root`. (The startup script will `su $NB_USER` after adjusting the user ID.) +* `-e NB_GID=100` - Instructs the startup script to change the numeric group ID of the `$NB_USER` to the given value. This feature is useful when mounting host volumes with specific group permissions. For this option to take effect, you must run the container with `--user root`. (The startup script will `su $NB_USER` after adjusting the group ID.) +* `-e CHOWN_HOME=yes` - Instructs the startup script to recursively change the `$NB_USER` home directory owner and group to the current value of `$NB_UID` and `$NB_GID`. This change will take effect even if the user home directory is mounted from the host using `-v` as described below. +* `-e GRANT_SUDO=yes` - Instructs the startup script to grant the `NB_USER` user passwordless `sudo` capability. You do **not** need too this option to allow the user to `conda` or `pip` install additional packages. This option is useful, however, when you wish to give `$NB_USER` the ability to install OS packages with `apt` or modify other root-owned files in the container. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su $NB_USER` after adding `$NB_USER` to sudoers.) **You should only enable `sudo` if you trust the user or if the container is running on an isolated host.** +* `-e GEN_CERT=yes` - Instructs the startup script to generates a self-signed SSL certificate and configure Jupyter Notebook to use it to accept encrypted HTTPS connections. * `-v /some/host/folder/for/work:/home/jovyan/work` - Mounts a host machine directory as folder in the container. Useful when you want to preserve notebooks and other work even after the container is destroyed. **You must grant the within-container notebook user or group (`NB_UID` or `NB_GID`) write access to the host directory (e.g., `sudo chown 1000 /some/host/folder/for/work`).** -* `--group-add users` - use this argument if you are also specifying - a specific user id to launch the container (`-u 5000`), rather than launching the container as root and relying on *NB_UID* and *NB_GID* to set the user and group. +* `-user 5000 --group-add users` - Launches the container with a specific user ID and adds that user to the `users` group so that it can modify files in the default home directory and `/opt/conda`. You can use these arguments as alternatives to setting `$NB_UID` and `$NB_GID`. ## SSL Certificates -You may mount SSL key and certificate files into a container and configure Jupyter Notebook to use them to accept HTTPS connections. For example, to mount a host folder containing a `notebook.key` and `notebook.crt`: +You may mount SSL key and certificate files into a container and configure Jupyter Notebook to use them to accept HTTPS connections. For example, to mount a host folder containing a `notebook.key` and `notebook.crt` and use them, you might run the following: ``` docker run -d -p 8888:8888 \ @@ -65,23 +67,11 @@ For additional information about using SSL, see the following: * The [jupyter_notebook_config.py](jupyter_notebook_config.py) file for how this Docker image generates a self-signed certificate. * The [Jupyter Notebook documentation](https://jupyter-notebook.readthedocs.io/en/latest/public_server.html#securing-a-notebook-server) for best practices about securing a public notebook server in general. -## Conda Environments - -The default Python 3.x [Conda environment](http://conda.pydata.org/docs/using/envs.html) resides in `/opt/conda`. The `/opt/conda/bin` directory is part of the default `jovyan` user's `$PATH`. That directory is also whitelisted for use in `sudo` commands by the `start.sh` script. - -The `jovyan` user has full read/write access to the `/opt/conda` directory. You can use either `conda` or `pip` to install new packages without any additional permissions. - -``` -# install a package into the default (python 3.x) environment -pip install some-package -conda install some-package -``` - ## Alternative Commands ### start.sh -The `start.sh` script supports the same features as the default `start-notebook.sh` script (e.g., `GRANT_SUDO`), but allows you to specify an arbitrary command to execute. For example, to run the text-based `ipython` console in a container, do the following: +The `start-notebook.sh` script actually inherits most of its option handling capability from a more generic `start.sh` script. The `start.sh` script supports all of the features described above, but allows you to specify an arbitrary command to execute. For example, to run the text-based `ipython` console in a container, do the following: ``` docker run -it --rm jupyter/base-notebook start.sh ipython @@ -97,21 +87,29 @@ This script is particularly useful when you derive a new Dockerfile from this im ### Others -You can bypass the provided scripts and specify your an arbitrary start command. If you do, keep in mind that certain features documented above will not function (e.g., `GRANT_SUDO`). +You can bypass the provided scripts and specify your an arbitrary start command. If you do, keep in mind that features supported by the `start.sh` script and its kin will not function (e.g., `GRANT_SUDO`). -## Image Specifics +## Conda Environments -### Spark and PySpark +The default Python 3.x [Conda environment](http://conda.pydata.org/docs/using/envs.html) resides in `/opt/conda`. The `/opt/conda/bin` directory is part of the default `jovyan` user's `$PATH`. That directory is also whitelisted for use in `sudo` commands by the `start.sh` script. -#### Using Spark Local Mode +The `jovyan` user has full read/write access to the `/opt/conda` directory. You can use either `conda` or `pip` to install new packages without any additional permissions. -This configuration is nice for using Spark on small, local data. +``` +# install a package into the default (python 3.x) environment +pip install some-package +conda install some-package +``` -0. Run the container as shown above. -2. Open a Python 2 or 3 notebook. -3. Create a `SparkContext` configured for local mode. +## Apache Spark -For example, the first few cells in the notebook might read: +The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images support the use of Apache Spark in Python, R, and Scala notebooks. The following sections provide some examples of how to get started using them. + +### Using Spark Local Mode + +Spark local mode is useful for experimentation on small data when you do not have a Spark cluster available. + +#### In a Python Notebook ```python import pyspark @@ -122,20 +120,54 @@ rdd = sc.parallelize(range(1000)) rdd.takeSample(False, 5) ``` +#### In a R Notebook + +```r +library(SparkR) + +as <- sparkR.session("local[*]") + +# do something to prove it works +df <- as.DataFrame(iris) +head(filter(df, df$Petal_Width > 0.2)) +``` + +#### In a Spylon Kernel Scala Notebook + +Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark options in a `%%init_spark` magic cell. + +```python +%%init_spark +# Configure Spark to use a local master +launcher.master = "local[*]" +``` + +```scala +// Now run Scala code that uses the initialized SparkContext in sc +val rdd = sc.parallelize(0 to 999) +rdd.takeSample(false, 5) +``` + +#### In an Apache Toree Scala Notebook + +Apache Toree instantiates a local `SparkContext` for you in variable `sc` when the kernel starts. + +```scala +val rdd = sc.parallelize(0 to 999) +rdd.takeSample(false, 5) +``` + ### Connecting to a Spark Cluster on Mesos This configuration allows your compute cluster to scale with your data. 0. [Deploy Spark on Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html). -1. Configure each slave with [the `--no-switch_user` flag](https://open.mesosphere.com/reference/mesos-slave/) or create the `jovyan` user on every slave node. -2. Ensure Python 2.x and/or 3.x and any Python libraries you wish to use in your Spark lambda functions are installed on your Spark workers. -3. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) +1. Configure each slave with [the `--no-switch_user` flag](https://open.mesosphere.com/reference/mesos-slave/) or create the `$NB_USER` account on every slave node. +2. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. -4. Open a Python 2 or 3 notebook. -5. Create a `SparkConf` instance in a new notebook pointing to your Mesos master node (or Zookeeper instance) and Spark binary package location. -6. Create a `SparkContext` using this configuration. +3. Follow the language specific instructions below. -For example, the first few cells in a Python 3 notebook might read: +#### In a Python Notebook ```python import os @@ -149,7 +181,7 @@ conf = pyspark.SparkConf() conf.setMaster("mesos://10.10.10.10:5050") # point to spark binary package in HDFS or on local filesystem on all slave # nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) -conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-2.2.0-bin-hadoop2.7.tgz") +conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz") # set other options as desired conf.set("spark.executor.memory", "8g") conf.set("spark.core.connection.ack.wait.timeout", "1200") @@ -162,16 +194,71 @@ rdd = sc.parallelize(range(100000000)) rdd.sumApprox(3) ``` -To use Python 2 in the notebook and on the workers, change the `PYSPARK_PYTHON` environment variable to point to the location of the Python 2.x interpreter binary. If you leave this environment variable unset, it defaults to `python`. +#### In a R Notebook -Of course, all of this can be hidden in an [IPython kernel startup script](http://ipython.org/ipython-doc/stable/development/config.html?highlight=startup#startup-files), but "explicit is better than implicit." :) +```r +library(SparkR) -#### Connecting to a Spark Cluster in Standalone Mode +# Point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) +# Point to spark binary package in HDFS or on local filesystem on all slave +# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) in sparkEnvir +# Set other options in sparkEnvir +sc <- sparkR.session("mesos://10.10.10.10:5050", sparkEnvir=list( + spark.executor.uri="hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz", + spark.executor.memory="8g" + ) +) -Connection to Spark Cluster in Standalone Mode requires the following set of steps: +# do something to prove it works +data(iris) +df <- as.DataFrame(iris) +head(filter(df, df$Petal_Width > 0.2)) +``` + +#### In a Spylon Kernel Scala Notebook + +```python +%%init_spark +# Configure the location of the mesos master and spark distribution on HDFS +launcher.master = "mesos://10.10.10.10:5050" +launcher.conf.spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz +``` + +```scala +// Now run Scala code that uses the initialized SparkContext in sc +val rdd = sc.parallelize(0 to 999) +rdd.takeSample(false, 5) +``` + +#### In an Apache Toree Scala Notebook + +The Apache Toree kernel automatically creates a `SparkContext` when it starts based on configuration information from its command line arguments and environment variables. You can pass information about your Mesos cluster via the `SPARK_OPTS` environment variable when you spawn a container. + +For instance, to pass information about a Mesos master, Spark binary location in HDFS, and an executor options, you could start the container like so: + +``` +docker run -d -p 8888:8888 -e SPARK_OPTS='--master=mesos://10.10.10.10:5050 \ + --spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz \ + --spark.executor.memory=8g' jupyter/all-spark-notebook +``` + +Note that this is the same information expressed in a notebook in the Python case above. Once the kernel spec has your cluster information, you can test your cluster in an Apache Toree notebook like so: + +```scala +// should print the value of --master in the kernel spec +println(sc.master) + +// do something to prove it works +val rdd = sc.parallelize(0 to 99999999) +rdd.sum() +``` + +### Connecting to a Spark Cluster on Standalone Mode + +Connection to Spark Cluster on Standalone Mode requires the following set of steps: 0. Verify that the docker image (check the Dockerfile) and the Spark Cluster which is being deployed, run the same version of Spark. 1. [Deploy Spark on Standalone Mode](http://spark.apache.org/docs/latest/spark-standalone.html). 2. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. -3. The language specific instructions are almost same as mentioned above for Mesos, only the master url would now be something like spark://10.10.10.10:7077 +3. The language specific instructions are almost same as mentioned above for Mesos, only the master url would now be something like spark://10.10.10.10:7077 \ No newline at end of file diff --git a/docs/using.md b/docs/using.md index e1dedf58..c4771c16 100644 --- a/docs/using.md +++ b/docs/using.md @@ -22,7 +22,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git `jupyter/base-notebook` is a small image supporting the [options common across all core stacks](configuration.html). It is the basis for all other stacks. * Minimally-functional Jupyter Notebook server (e.g., no [pandoc](https://pandoc.org/) for saving notebooks as PDFs) -* [Miniconda](https://conda.io/miniconda.html) Python 3.x +* [Miniconda](https://conda.io/miniconda.html) Python 3.x in `/opt/conda` * No preinstalled scientific computing packages * Unprivileged user `jovyan` (`uid=1000`, configurable, see options) in group `users` (`gid=100`) with ownership over the `/home/jovyan` and `/opt/conda` paths * `tini` as the container entrypoint and a `start-notebook.sh` script as the default command @@ -251,7 +251,7 @@ notebook ### Using Binder -[Binder](https://mybinder.org/) is a service that allows you to create and share custom computing environments for projects in version control. You can use any of the Jupyter Docker Stack images as a basis for a Binder-compatible Dockerfile. See the [docker-stacks example](https://mybinder.readthedocs.io/en/latest/sample_repos.html#using-a-docker-image-from-the-jupyter-docker-stacks-repository) and [Using a Dockerfile](https://mybinder.readthedocs.io/en/latest/dockerfile.html) sections in the [Binder documentation](https://mybinder.readthedocs.io/en/latest/index.html) for instructions. +[Binder](https://mybinder.org/) is a service that allows you to create and share custom computing environments for projects in version control. You can use any of the Jupyter Docker Stacks images as a basis for a Binder-compatible Dockerfile. See the [docker-stacks example](https://mybinder.readthedocs.io/en/latest/sample_repos.html#using-a-docker-image-from-the-jupyter-docker-stacks-repository) and [Using a Dockerfile](https://mybinder.readthedocs.io/en/latest/dockerfile.html) sections in the [Binder documentation](https://mybinder.readthedocs.io/en/latest/index.html) for instructions. ### Using JupyterHub From 6e55fcc42fc39d46a91034736d3f4e3bd9630c67 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 25 Mar 2018 21:00:06 -0400 Subject: [PATCH 11/17] Split pages, use RTD theme for now I can't seem to configure the Jupyter Albaster theme correctly and will reach out for help later. --- docs/conf.py | 9 +- docs/contributing.md | 9 - docs/contributing/packages.md | 5 + docs/contributing/recipes.md | 1 + docs/contributing/stacks.md | 1 + docs/contributing/tests.md | 1 + docs/index.rst | 39 ++-- docs/{configuration.md => using/common.md} | 165 +---------------- docs/using/running.md | 123 +++++++++++++ docs/{using.md => using/selecting.md} | 150 ++-------------- docs/using/specifics.md | 198 +++++++++++++++++++++ requirements-dev.txt | 3 +- 12 files changed, 381 insertions(+), 323 deletions(-) delete mode 100644 docs/contributing.md create mode 100644 docs/contributing/packages.md create mode 100644 docs/contributing/recipes.md create mode 100644 docs/contributing/stacks.md create mode 100644 docs/contributing/tests.md rename docs/{configuration.md => using/common.md} (52%) create mode 100644 docs/using/running.md rename docs/{using.md => using/selecting.md} (55%) create mode 100644 docs/using/specifics.md diff --git a/docs/conf.py b/docs/conf.py index 83f81094..a074a76e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,7 +36,6 @@ needs_sphinx = '1.4' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'jupyter_alabaster_theme' ] # Add any paths that contain templates here, relative to this directory. @@ -53,7 +52,7 @@ master_doc = 'index' # General information about the project. project = 'docker-stacks' -copyright = '2017, Project Jupyter' +copyright = '2018, Project Jupyter' author = 'Project Jupyter' # The version info for the project you're documenting, acts as replacement for @@ -61,9 +60,9 @@ author = 'Project Jupyter' # built documents. # # The short X.Y version. -version = '1.0' +version = 'latest' # The full version, including alpha/beta/rc tags. -release = '1.0' +release = 'latest' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -97,7 +96,7 @@ source_suffix = ['.rst', '.md'] # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'jupyter_alabaster_theme' +html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/contributing.md b/docs/contributing.md deleted file mode 100644 index f087993d..00000000 --- a/docs/contributing.md +++ /dev/null @@ -1,9 +0,0 @@ -# Contributing - -## Package Updates - -## New Packages - -## Tests - -## Community Stacks \ No newline at end of file diff --git a/docs/contributing/packages.md b/docs/contributing/packages.md new file mode 100644 index 00000000..fcbd721f --- /dev/null +++ b/docs/contributing/packages.md @@ -0,0 +1,5 @@ +# Packages + +## Package Updates + +## New Packages \ No newline at end of file diff --git a/docs/contributing/recipes.md b/docs/contributing/recipes.md new file mode 100644 index 00000000..6c227696 --- /dev/null +++ b/docs/contributing/recipes.md @@ -0,0 +1 @@ +# Recipes \ No newline at end of file diff --git a/docs/contributing/stacks.md b/docs/contributing/stacks.md new file mode 100644 index 00000000..5a1dce66 --- /dev/null +++ b/docs/contributing/stacks.md @@ -0,0 +1 @@ +# Community Stacks \ No newline at end of file diff --git a/docs/contributing/tests.md b/docs/contributing/tests.md new file mode 100644 index 00000000..ab381d8c --- /dev/null +++ b/docs/contributing/tests.md @@ -0,0 +1 @@ +# Image Tests \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 5bbd62d3..abfb0bc0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,19 +3,10 @@ Jupyter Docker Stacks Jupyter Docker Stacks are a set of ready-to-run Docker images containing Jupyter applications and interactive computing tools. You can use a stack image to start a personal Jupyter Notebook server in a local Docker container, to run JupyterLab servers for a team using JupyterHub, to write your own project Dockerfile, and so on. -**Table of Contents** - -.. toctree:: - :maxdepth: 1 - - using - configuration - contributing - Quick Start ----------- -The two examples below may help you get started if you `have Docker installed `_, know :doc:`which Docker image ` you want to use, and want to launch a single Jupyter Notebook server in a container. The other pages in this documentation describe additional uses and features in detail. +The two examples below may help you get started if you `have Docker installed `_, know :doc:`which Docker image ` you want to use, and want to launch a single Jupyter Notebook server in a container. The other pages in this documentation describe additional uses and features in detail. **Example 1:** This command pulls the `jupyter/scipy-notebook` image tagged `2c80cf3537ca` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 8888. The server logs appear in the terminal and include a URL to the notebook server. The container remains intact for restart after notebook server exit.:: @@ -24,3 +15,31 @@ The two examples below may help you get started if you `have Docker installed + Jupyter mailing list + Jupyter website diff --git a/docs/configuration.md b/docs/using/common.md similarity index 52% rename from docs/configuration.md rename to docs/using/common.md index 712afd4b..16c233f9 100644 --- a/docs/configuration.md +++ b/docs/using/common.md @@ -1,6 +1,6 @@ -# Options and Configuration +# Common Features -When launched as Docker containers, all of the Jupyter Docker Stacks start a Jupyter Notebook server by default. They do so by executing a `start-notebook.sh` script which configures the internal container environment and then runs `jupyter notebook $*`, passing it any command line arguments received. +A container launched from any Jupyter Docker Stacks image runs a Jupyter Notebook server by default. The container does so by executing a `start-notebook.sh` script. This script configures the internal container environment and then runs `jupyter notebook`, passing it any command line arguments received. This page describes the options supported by the startup script as well as how to bypass it to run alternative commands. @@ -101,164 +101,3 @@ pip install some-package conda install some-package ``` -## Apache Spark - -The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images support the use of Apache Spark in Python, R, and Scala notebooks. The following sections provide some examples of how to get started using them. - -### Using Spark Local Mode - -Spark local mode is useful for experimentation on small data when you do not have a Spark cluster available. - -#### In a Python Notebook - -```python -import pyspark -sc = pyspark.SparkContext('local[*]') - -# do something to prove it works -rdd = sc.parallelize(range(1000)) -rdd.takeSample(False, 5) -``` - -#### In a R Notebook - -```r -library(SparkR) - -as <- sparkR.session("local[*]") - -# do something to prove it works -df <- as.DataFrame(iris) -head(filter(df, df$Petal_Width > 0.2)) -``` - -#### In a Spylon Kernel Scala Notebook - -Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark options in a `%%init_spark` magic cell. - -```python -%%init_spark -# Configure Spark to use a local master -launcher.master = "local[*]" -``` - -```scala -// Now run Scala code that uses the initialized SparkContext in sc -val rdd = sc.parallelize(0 to 999) -rdd.takeSample(false, 5) -``` - -#### In an Apache Toree Scala Notebook - -Apache Toree instantiates a local `SparkContext` for you in variable `sc` when the kernel starts. - -```scala -val rdd = sc.parallelize(0 to 999) -rdd.takeSample(false, 5) -``` - -### Connecting to a Spark Cluster on Mesos - -This configuration allows your compute cluster to scale with your data. - -0. [Deploy Spark on Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html). -1. Configure each slave with [the `--no-switch_user` flag](https://open.mesosphere.com/reference/mesos-slave/) or create the `$NB_USER` account on every slave node. -2. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) - * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. -3. Follow the language specific instructions below. - -#### In a Python Notebook - -```python -import os -# make sure pyspark tells workers to use python3 not 2 if both are installed -os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3' - -import pyspark -conf = pyspark.SparkConf() - -# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) -conf.setMaster("mesos://10.10.10.10:5050") -# point to spark binary package in HDFS or on local filesystem on all slave -# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) -conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz") -# set other options as desired -conf.set("spark.executor.memory", "8g") -conf.set("spark.core.connection.ack.wait.timeout", "1200") - -# create the context -sc = pyspark.SparkContext(conf=conf) - -# do something to prove it works -rdd = sc.parallelize(range(100000000)) -rdd.sumApprox(3) -``` - -#### In a R Notebook - -```r -library(SparkR) - -# Point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) -# Point to spark binary package in HDFS or on local filesystem on all slave -# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) in sparkEnvir -# Set other options in sparkEnvir -sc <- sparkR.session("mesos://10.10.10.10:5050", sparkEnvir=list( - spark.executor.uri="hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz", - spark.executor.memory="8g" - ) -) - -# do something to prove it works -data(iris) -df <- as.DataFrame(iris) -head(filter(df, df$Petal_Width > 0.2)) -``` - -#### In a Spylon Kernel Scala Notebook - -```python -%%init_spark -# Configure the location of the mesos master and spark distribution on HDFS -launcher.master = "mesos://10.10.10.10:5050" -launcher.conf.spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz -``` - -```scala -// Now run Scala code that uses the initialized SparkContext in sc -val rdd = sc.parallelize(0 to 999) -rdd.takeSample(false, 5) -``` - -#### In an Apache Toree Scala Notebook - -The Apache Toree kernel automatically creates a `SparkContext` when it starts based on configuration information from its command line arguments and environment variables. You can pass information about your Mesos cluster via the `SPARK_OPTS` environment variable when you spawn a container. - -For instance, to pass information about a Mesos master, Spark binary location in HDFS, and an executor options, you could start the container like so: - -``` -docker run -d -p 8888:8888 -e SPARK_OPTS='--master=mesos://10.10.10.10:5050 \ - --spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz \ - --spark.executor.memory=8g' jupyter/all-spark-notebook -``` - -Note that this is the same information expressed in a notebook in the Python case above. Once the kernel spec has your cluster information, you can test your cluster in an Apache Toree notebook like so: - -```scala -// should print the value of --master in the kernel spec -println(sc.master) - -// do something to prove it works -val rdd = sc.parallelize(0 to 99999999) -rdd.sum() -``` - -### Connecting to a Spark Cluster on Standalone Mode - -Connection to Spark Cluster on Standalone Mode requires the following set of steps: - -0. Verify that the docker image (check the Dockerfile) and the Spark Cluster which is being deployed, run the same version of Spark. -1. [Deploy Spark on Standalone Mode](http://spark.apache.org/docs/latest/spark-standalone.html). -2. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) - * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. -3. The language specific instructions are almost same as mentioned above for Mesos, only the master url would now be something like spark://10.10.10.10:7077 \ No newline at end of file diff --git a/docs/using/running.md b/docs/using/running.md new file mode 100644 index 00000000..72af3bfd --- /dev/null +++ b/docs/using/running.md @@ -0,0 +1,123 @@ +# Running a Container + +Using one of the Jupyter Docker Stacks requires two choices: + +1. Which Docker image you wish to use +2. How you wish to start Docker containers from that image + +This section provides details about the second. + +## Using the Docker CLI + +You can launch a local Docker container from the Jupyter Docker Stacks using the [Docker command line interface](https://docs.docker.com/engine/reference/commandline/cli/). There are numerous ways to configure containers using the CLI. The following are a couple common patterns. + +**Example 1** This command pulls the `jupyter/scipy-notebook` image tagged `2c80cf3537ca` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 8888. The server logs appear in the terminal and include a URL to the notebook server. + +``` +docker run -p 8888:8888 jupyter/scipy-notebook:2c80cf3537ca + +Executing the command: jupyter notebook +[I 15:33:00.567 NotebookApp] Writing notebook server cookie secret to /home/jovyan/.local/share/jupyter/runtime/notebook_cookie_secret +[W 15:33:01.084 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. +[I 15:33:01.150 NotebookApp] JupyterLab alpha preview extension loaded from /opt/conda/lib/python3.6/site-packages/jupyterlab +[I 15:33:01.150 NotebookApp] JupyterLab application directory is /opt/conda/share/jupyter/lab +[I 15:33:01.155 NotebookApp] Serving notebooks from local directory: /home/jovyan +[I 15:33:01.156 NotebookApp] 0 active kernels +[I 15:33:01.156 NotebookApp] The Jupyter Notebook is running at: +[I 15:33:01.157 NotebookApp] http://[all ip addresses on your system]:8888/?token=112bb073331f1460b73768c76dffb2f87ac1d4ca7870d46a +[I 15:33:01.157 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation). +[C 15:33:01.160 NotebookApp] + + Copy/paste this URL into your browser when you connect for the first time, + to login with a token: + http://localhost:8888/?token=112bb073331f1460b73768c76dffb2f87ac1d4ca7870d46a +``` + +Pressing `Ctrl-C` shuts down the notebook server but leaves the container intact on disk for later restart or permanent deletion using commands like the following: + +``` +# list containers +docker ps -a +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +d67fe77f1a84 jupyter/base-notebook "tini -- start-noteb…" 44 seconds ago Exited (0) 39 seconds ago cocky_mirzakhani + +# start the stopped container +docker start -a d67fe77f1a84 +Executing the command: jupyter notebook +[W 16:45:02.020 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. +... + +# remove the stopped container +docker rm d67fe77f1a84 +d67fe77f1a84 +``` + +**Example 2** This command pulls the `jupyter/r-notebook` image tagged `e5c5a7d3e52d` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 10000. The server logs appear in the terminal and include a URL to the notebook server, but with the internal container port (8888) instead of the the correct host port (10000). + +``` +docker run --rm -p 10000:8888 -v "$PWD":/home/jovyan/work jupyter/r-notebook:e5c5a7d3e52d + +Executing the command: jupyter notebook +[I 19:31:09.573 NotebookApp] Writing notebook server cookie secret to /home/jovyan/.local/share/jupyter/runtime/notebook_cookie_secret +[W 19:31:11.930 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. +[I 19:31:12.085 NotebookApp] JupyterLab alpha preview extension loaded from /opt/conda/lib/python3.6/site-packages/jupyterlab +[I 19:31:12.086 NotebookApp] JupyterLab application directory is /opt/conda/share/jupyter/lab +[I 19:31:12.117 NotebookApp] Serving notebooks from local directory: /home/jovyan +[I 19:31:12.117 NotebookApp] 0 active kernels +[I 19:31:12.118 NotebookApp] The Jupyter Notebook is running at: +[I 19:31:12.119 NotebookApp] http://[all ip addresses on your system]:8888/?token=3b8dce890cb65570fb0d9c4a41ae067f7604873bd604f5ac +[I 19:31:12.120 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation). +[C 19:31:12.122 NotebookApp] + + Copy/paste this URL into your browser when you connect for the first time, + to login with a token: + http://localhost:8888/?token=3b8dce890cb65570fb0d9c4a41ae067f7604873bd604f5ac +``` + +Pressing `Ctrl-C` shuts down the notebook server and immediately destroys the Docker container. Files written to `~/work` in the container remain touched. Any other changes made in the container are lost. + +**Example 3** This command pulls the `jupyter/all-spark-notebook` image currently tagged `latest` from Docker Hub if an image tagged `latest` is not already present on the local host. It then starts a container named `notebook` running a JupyterLab server and exposes the server on a randomly selected port. + +``` +docker run -d -P --name notebook jupyter/all-spark-notebook +``` + +The assigned port and notebook server token are visible using other Docker commands. + +``` +# get the random host port assigned to the container port 8888 +docker port notebook 8888 +0.0.0.0:32769 + +# get the notebook token from the logs +docker logs --tail 3 notebook + Copy/paste this URL into your browser when you connect for the first time, + to login with a token: + http://localhost:8888/?token=15914ca95f495075c0aa7d0e060f1a78b6d94f70ea373b00 +``` + +Together, the URL to visit on the host machine to access the server in this case is http://localhost:32769?token=15914ca95f495075c0aa7d0e060f1a78b6d94f70ea373b00. + +The container runs in the background until stopped and/or removed by additional Docker commands. + +``` +# stop the container +docker stop notebook +notebook + +# remove the container permanently +docker rm notebook +notebook +``` + +## Using Binder + +[Binder](https://mybinder.org/) is a service that allows you to create and share custom computing environments for projects in version control. You can use any of the Jupyter Docker Stacks images as a basis for a Binder-compatible Dockerfile. See the [docker-stacks example](https://mybinder.readthedocs.io/en/latest/sample_repos.html#using-a-docker-image-from-the-jupyter-docker-stacks-repository) and [Using a Dockerfile](https://mybinder.readthedocs.io/en/latest/dockerfile.html) sections in the [Binder documentation](https://mybinder.readthedocs.io/en/latest/index.html) for instructions. + +## Using JupyterHub + +You can configure JupyterHub to launcher Docker containers from the Jupyter Docker Stacks images. If you've been following the [Zero to JupyterHub with Kubernetes](http://zero-to-jupyterhub.readthedocs.io/en/latest/) guide, see the [Use an existing Docker image](http://zero-to-jupyterhub.readthedocs.io/en/latest/user-environment.html#use-an-existing-docker-image) section for details. If you have a custom JupyterHub deployment, see the [Picking or building a Docker image](https://github.com/jupyterhub/dockerspawner#picking-or-building-a-docker-image) instructions for the [dockerspawner](https://github.com/jupyterhub/dockerspawner) instead. + +## Using Other Tools and Services + +You can use the Jupyter Docker Stacks with any Docker-compatible technology (e.g., [Docker Compose](https://docs.docker.com/compose/), [docker-py](https://github.com/docker/docker-py), your favorite cloud container service). See the documentation of the tool, library, or service for details about how to reference, configure, and launch containers from these images. \ No newline at end of file diff --git a/docs/using.md b/docs/using/selecting.md similarity index 55% rename from docs/using.md rename to docs/using/selecting.md index c4771c16..7105b963 100644 --- a/docs/using.md +++ b/docs/using/selecting.md @@ -1,19 +1,17 @@ -# Users Guide +# Selecting an Image Using one of the Jupyter Docker Stacks requires two choices: 1. Which Docker image you wish to use 2. How you wish to start Docker containers from that image -This section provides details about the available images and runtimes. +This section provides details about the first. -## Selecting an Image - -### Core Stacks +## Core Stacks The Jupyter team maintains a set of Docker image definitions in the [https://github.com/jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) GitHub repository. The following sections describe these images including their contents, relationships, and versioning strategy. -#### jupyter/base-notebook +### jupyter/base-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/base-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/base-notebook/Dockerfile) @@ -30,7 +28,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * A `start.sh` script useful for running alternative commands in the container (e.g. `ipython`, `jupyter kernelgateway`, `jupyter lab`) * Options for a self-signed HTTPS certificate and passwordless sudo -#### jupyter/minimal-notebook +### jupyter/minimal-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/minimal-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/minimal-notebook/Dockerfile) @@ -42,7 +40,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * [Pandoc](http://pandoc.org) and [TeX Live](https://www.tug.org/texlive/) for notebook document conversion * [git](https://git-scm.com/), [emacs](https://www.gnu.org/software/emacs/), [jed](https://www.jedsoft.org/jed/), [vim](http://www.vim.org/), and unzip -#### jupyter/r-notebook +### jupyter/r-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/r-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/r-notebook/Dockerfile) @@ -56,7 +54,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * [tidyverse](https://www.tidyverse.org/) packages, including [ggplot2](http://ggplot2.org/), [dplyr](http://dplyr.tidyverse.org/), [tidyr](http://tidyr.tidyverse.org/), [readr](http://readr.tidyverse.org/), [purrr](http://purrr.tidyverse.org/), [tibble](http://tibble.tidyverse.org/), [stringr](http://stringr.tidyverse.org/), [lubridate](http://lubridate.tidyverse.org/), and [broom](https://cran.r-project.org/web/packages/broom/vignettes/broom.html) from [conda-forge](https://conda-forge.github.io/feedstocks) * [plyr](https://cran.r-project.org/web/packages/plyr/index.html), [devtools](https://cran.r-project.org/web/packages/devtools/index.html), [shiny](https://shiny.rstudio.com/), [rmarkdown](http://rmarkdown.rstudio.com/), [forecast](https://cran.r-project.org/web/packages/forecast/forecast.pdf), [rsqlite](https://cran.r-project.org/web/packages/RSQLite/index.html), [reshape2](https://cran.r-project.org/web/packages/reshape2/reshape2.pdf), [nycflights13](https://cran.r-project.org/web/packages/nycflights13/index.html), [caret](http://topepo.github.io/caret/index.html), [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html), and [randomforest](https://cran.r-project.org/web/packages/randomForest/randomForest.pdf) packages from [conda-forge](https://conda-forge.github.io/feedstocks) -#### jupyter/scipy-notebook +### jupyter/scipy-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/scipy-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/scipy-notebook/Dockerfile) @@ -69,7 +67,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * [ipywidgets](https://ipywidgets.readthedocs.io/en/stable/) for interactive visualizations in Python notebooks * [Facets](https://github.com/PAIR-code/facets) for visualizing machine learning datasets -#### jupyter/tensorflow-notebook +### jupyter/tensorflow-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/tensorflow-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/tensorflow-notebook/Dockerfile) @@ -80,7 +78,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * Everything in `jupyter/scipy-notebook` and its ancestor images * [tensorflow](https://www.tensorflow.org/) and [keras](https://keras.io/) machine learning libraries -#### jupyter/datascience-notebook +### jupyter/datascience-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/datascience-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/datascience-notebook/Dockerfile) @@ -93,7 +91,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * [IJulia](https://github.com/JuliaLang/IJulia.jl) to support Julia code in Jupyter notebooks * [HDF5](https://github.com/JuliaIO/HDF5.jl), [Gadfly](http://gadflyjl.org/stable/), and [RDatasets](https://github.com/johnmyleswhite/RDatasets.jl) packages -#### jupyter/pyspark-notebook +### jupyter/pyspark-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/pyspark-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/pyspark-notebook/Dockerfile) @@ -105,7 +103,7 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * [Apache Spark](https://spark.apache.org/) with Hadoop binaries * [Mesos](http://mesos.apache.org/) client libraries -#### jupyter/all-spark-notebook +### jupyter/all-spark-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/all-spark-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/all-spark-notebook/Dockerfile) @@ -118,17 +116,17 @@ The Jupyter team maintains a set of Docker image definitions in the [https://git * [Apache Toree](https://toree.apache.org/) and [spylon-kernel](https://github.com/maxpoint/spylon-kernel) to support Scala code in Jupyter notebooks * [ggplot2](http://ggplot2.org/), [sparklyr](http://spark.rstudio.com/), and [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html) packages -#### Image Relationships +### Image Relationships The following diagram depicts the build dependency tree of the core images. (i.e., the `FROM` statements in their Dockerfiles). Any given image inherits the complete content of all ancestor images pointing to it. -[![Image inheritance diagram](images/inherit.svg)](http://interactive.blockdiag.com/?compression=deflate&src=eJyFzTEPgjAQhuHdX9Gws5sQjGzujsaYKxzmQrlr2msMGv-71K0srO_3XGud9NNA8DSfgzESCFlBSdi0xkvQAKTNugw4QnL6GIU10hvX-Zh7Z24OLLq2SjaxpvP10lX35vCf6pOxELFmUbQiUz4oQhYzMc3gCrRt2cWe_FKosmSjyFHC6OS1AwdQWCtyj7sfh523_BI9hKlQ25YdOFdv5fcH0kiEMA) +[![Image inheritance diagram](../images/inherit.svg)](http://interactive.blockdiag.com/?compression=deflate&src=eJyFzTEPgjAQhuHdX9Gws5sQjGzujsaYKxzmQrlr2msMGv-71K0srO_3XGud9NNA8DSfgzESCFlBSdi0xkvQAKTNugw4QnL6GIU10hvX-Zh7Z24OLLq2SjaxpvP10lX35vCf6pOxELFmUbQiUz4oQhYzMc3gCrRt2cWe_FKosmSjyFHC6OS1AwdQWCtyj7sfh523_BI9hKlQ25YdOFdv5fcH0kiEMA) -#### Builds +### Builds Pull requests to the `jupyter/docker-stacks` repository trigger builds of all images on Travis CI. These images are for testing purposes only and are not saved for use. When pull requests merge to master, all images rebuild on Docker Cloud and become available to `docker pull` from Docker Hub. -#### Versioning +### Versioning The `latest` tag in each Docker Hub repository tracks the master branch `HEAD` reference on GitHub. `latest` is a moving target, by definition, and will have backward-incompatible changes regularly. @@ -136,7 +134,7 @@ Every image on Docker Hub also receives a 12-character tag which corresponds wit You must refer to git-SHA image tags when stability and reproducibility are important in your work. (e.g. `FROM jupyter/scipy-notebook:7c45ec67c8e7`, `docker run -it --rm jupyter/scipy-notebook:7c45ec67c8e7`). You should only use `latest` when a one-off container instance is acceptable (e.g., you want to briefly try a new library in a notebook). -### Community Stacks +## Community Stacks The core stacks are just a tiny sample of what's possible when combining Jupyter with other technologies. We encourage members of the Jupyter community to create their own stacks based on the core images and link them below. @@ -144,119 +142,3 @@ The core stacks are just a tiny sample of what's possible when combining Jupyter See the [contributing guide](contributing#Community-Stacks) for information about how to create your own Jupyter Docker Stack. -## Running a Container - -### Using the Docker CLI - -You can launch a local Docker container from the Jupyter Docker Stacks using the [Docker command line interface](https://docs.docker.com/engine/reference/commandline/cli/). There are numerous ways to configure containers using the CLI. The following are a couple common patterns. - -**Example 1** This command pulls the `jupyter/scipy-notebook` image tagged `2c80cf3537ca` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 8888. The server logs appear in the terminal and include a URL to the notebook server. - -``` -docker run -p 8888:8888 jupyter/scipy-notebook:2c80cf3537ca - -Executing the command: jupyter notebook -[I 15:33:00.567 NotebookApp] Writing notebook server cookie secret to /home/jovyan/.local/share/jupyter/runtime/notebook_cookie_secret -[W 15:33:01.084 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. -[I 15:33:01.150 NotebookApp] JupyterLab alpha preview extension loaded from /opt/conda/lib/python3.6/site-packages/jupyterlab -[I 15:33:01.150 NotebookApp] JupyterLab application directory is /opt/conda/share/jupyter/lab -[I 15:33:01.155 NotebookApp] Serving notebooks from local directory: /home/jovyan -[I 15:33:01.156 NotebookApp] 0 active kernels -[I 15:33:01.156 NotebookApp] The Jupyter Notebook is running at: -[I 15:33:01.157 NotebookApp] http://[all ip addresses on your system]:8888/?token=112bb073331f1460b73768c76dffb2f87ac1d4ca7870d46a -[I 15:33:01.157 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation). -[C 15:33:01.160 NotebookApp] - - Copy/paste this URL into your browser when you connect for the first time, - to login with a token: - http://localhost:8888/?token=112bb073331f1460b73768c76dffb2f87ac1d4ca7870d46a -``` - -Pressing `Ctrl-C` shuts down the notebook server but leaves the container intact on disk for later restart or permanent deletion using commands like the following: - -``` -# list containers -docker ps -a -CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -d67fe77f1a84 jupyter/base-notebook "tini -- start-noteb…" 44 seconds ago Exited (0) 39 seconds ago cocky_mirzakhani - -# start the stopped container -docker start -a d67fe77f1a84 -Executing the command: jupyter notebook -[W 16:45:02.020 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. -... - -# remove the stopped container -docker rm d67fe77f1a84 -d67fe77f1a84 -``` - -**Example 2** This command pulls the `jupyter/r-notebook` image tagged `e5c5a7d3e52d` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 10000. The server logs appear in the terminal and include a URL to the notebook server, but with the internal container port (8888) instead of the the correct host port (10000). - -``` -docker run --rm -p 10000:8888 -v "$PWD":/home/jovyan/work jupyter/r-notebook:e5c5a7d3e52d - -Executing the command: jupyter notebook -[I 19:31:09.573 NotebookApp] Writing notebook server cookie secret to /home/jovyan/.local/share/jupyter/runtime/notebook_cookie_secret -[W 19:31:11.930 NotebookApp] WARNING: The notebook server is listening on all IP addresses and not using encryption. This is not recommended. -[I 19:31:12.085 NotebookApp] JupyterLab alpha preview extension loaded from /opt/conda/lib/python3.6/site-packages/jupyterlab -[I 19:31:12.086 NotebookApp] JupyterLab application directory is /opt/conda/share/jupyter/lab -[I 19:31:12.117 NotebookApp] Serving notebooks from local directory: /home/jovyan -[I 19:31:12.117 NotebookApp] 0 active kernels -[I 19:31:12.118 NotebookApp] The Jupyter Notebook is running at: -[I 19:31:12.119 NotebookApp] http://[all ip addresses on your system]:8888/?token=3b8dce890cb65570fb0d9c4a41ae067f7604873bd604f5ac -[I 19:31:12.120 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation). -[C 19:31:12.122 NotebookApp] - - Copy/paste this URL into your browser when you connect for the first time, - to login with a token: - http://localhost:8888/?token=3b8dce890cb65570fb0d9c4a41ae067f7604873bd604f5ac -``` - -Pressing `Ctrl-C` shuts down the notebook server and immediately destroys the Docker container. Files written to `~/work` in the container remain touched. Any other changes made in the container are lost. - -**Example 3** This command pulls the `jupyter/all-spark-notebook` image currently tagged `latest` from Docker Hub if an image tagged `latest` is not already present on the local host. It then starts a container named `notebook` running a JupyterLab server and exposes the server on a randomly selected port. - -``` -docker run -d -P --name notebook jupyter/all-spark-notebook -``` - -The assigned port and notebook server token are visible using other Docker commands. - -``` -# get the random host port assigned to the container port 8888 -docker port notebook 8888 -0.0.0.0:32769 - -# get the notebook token from the logs -docker logs --tail 3 notebook - Copy/paste this URL into your browser when you connect for the first time, - to login with a token: - http://localhost:8888/?token=15914ca95f495075c0aa7d0e060f1a78b6d94f70ea373b00 -``` - -Together, the URL to visit on the host machine to access the server in this case is http://localhost:32769?token=15914ca95f495075c0aa7d0e060f1a78b6d94f70ea373b00. - -The container runs in the background until stopped and/or removed by additional Docker commands. - -``` -# stop the container -docker stop notebook -notebook - -# remove the container permanently -docker rm notebook -notebook -``` - -### Using Binder - -[Binder](https://mybinder.org/) is a service that allows you to create and share custom computing environments for projects in version control. You can use any of the Jupyter Docker Stacks images as a basis for a Binder-compatible Dockerfile. See the [docker-stacks example](https://mybinder.readthedocs.io/en/latest/sample_repos.html#using-a-docker-image-from-the-jupyter-docker-stacks-repository) and [Using a Dockerfile](https://mybinder.readthedocs.io/en/latest/dockerfile.html) sections in the [Binder documentation](https://mybinder.readthedocs.io/en/latest/index.html) for instructions. - -### Using JupyterHub - -You can configure JupyterHub to launcher Docker containers from the Jupyter Docker Stacks images. If you've been following the [Zero to JupyterHub with Kubernetes](http://zero-to-jupyterhub.readthedocs.io/en/latest/) guide, see the [Use an existing Docker image](http://zero-to-jupyterhub.readthedocs.io/en/latest/user-environment.html#use-an-existing-docker-image) section for details. If you have a custom JupyterHub deployment, see the [Picking or building a Docker image](https://github.com/jupyterhub/dockerspawner#picking-or-building-a-docker-image) instructions for the [dockerspawner](https://github.com/jupyterhub/dockerspawner) instead. - -### Using Other Tools and Services - -You can use the Jupyter Docker Stacks with any Docker-compatible technology (e.g., [Docker Compose](https://docs.docker.com/compose/), [docker-py](https://github.com/docker/docker-py), your favorite cloud container service). See the documentation of the tool, library, or service for details about how to reference, configure, and launch containers from these images. \ No newline at end of file diff --git a/docs/using/specifics.md b/docs/using/specifics.md new file mode 100644 index 00000000..7d9ea5bb --- /dev/null +++ b/docs/using/specifics.md @@ -0,0 +1,198 @@ +# Image Specifics + +This page provides details about features specific to one or more images. + +## Apache Spark + +The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images support the use of [Apache Spark](https://spark.apache.org/) in Python, R, and Scala notebooks. The following sections provide some examples of how to get started using them. + +### Using Spark Local Mode + +Spark local mode is useful for experimentation on small data when you do not have a Spark cluster available. + +#### In a Python Notebook + +```python +import pyspark +sc = pyspark.SparkContext('local[*]') + +# do something to prove it works +rdd = sc.parallelize(range(1000)) +rdd.takeSample(False, 5) +``` + +#### In a R Notebook + +```r +library(SparkR) + +as <- sparkR.session("local[*]") + +# do something to prove it works +df <- as.DataFrame(iris) +head(filter(df, df$Petal_Width > 0.2)) +``` + +#### In a Spylon Kernel Scala Notebook + +Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark options in a `%%init_spark` magic cell. + +```python +%%init_spark +# Configure Spark to use a local master +launcher.master = "local[*]" +``` + +```scala +// Now run Scala code that uses the initialized SparkContext in sc +val rdd = sc.parallelize(0 to 999) +rdd.takeSample(false, 5) +``` + +#### In an Apache Toree Scala Notebook + +Apache Toree instantiates a local `SparkContext` for you in variable `sc` when the kernel starts. + +```scala +val rdd = sc.parallelize(0 to 999) +rdd.takeSample(false, 5) +``` + +### Connecting to a Spark Cluster on Mesos + +This configuration allows your compute cluster to scale with your data. + +0. [Deploy Spark on Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html). +1. Configure each slave with [the `--no-switch_user` flag](https://open.mesosphere.com/reference/mesos-slave/) or create the `$NB_USER` account on every slave node. +2. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) + * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. +3. Follow the language specific instructions below. + +#### In a Python Notebook + +```python +import os +# make sure pyspark tells workers to use python3 not 2 if both are installed +os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3' + +import pyspark +conf = pyspark.SparkConf() + +# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) +conf.setMaster("mesos://10.10.10.10:5050") +# point to spark binary package in HDFS or on local filesystem on all slave +# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) +conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz") +# set other options as desired +conf.set("spark.executor.memory", "8g") +conf.set("spark.core.connection.ack.wait.timeout", "1200") + +# create the context +sc = pyspark.SparkContext(conf=conf) + +# do something to prove it works +rdd = sc.parallelize(range(100000000)) +rdd.sumApprox(3) +``` + +#### In a R Notebook + +```r +library(SparkR) + +# Point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) +# Point to spark binary package in HDFS or on local filesystem on all slave +# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) in sparkEnvir +# Set other options in sparkEnvir +sc <- sparkR.session("mesos://10.10.10.10:5050", sparkEnvir=list( + spark.executor.uri="hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz", + spark.executor.memory="8g" + ) +) + +# do something to prove it works +data(iris) +df <- as.DataFrame(iris) +head(filter(df, df$Petal_Width > 0.2)) +``` + +#### In a Spylon Kernel Scala Notebook + +```python +%%init_spark +# Configure the location of the mesos master and spark distribution on HDFS +launcher.master = "mesos://10.10.10.10:5050" +launcher.conf.spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz +``` + +```scala +// Now run Scala code that uses the initialized SparkContext in sc +val rdd = sc.parallelize(0 to 999) +rdd.takeSample(false, 5) +``` + +#### In an Apache Toree Scala Notebook + +The Apache Toree kernel automatically creates a `SparkContext` when it starts based on configuration information from its command line arguments and environment variables. You can pass information about your Mesos cluster via the `SPARK_OPTS` environment variable when you spawn a container. + +For instance, to pass information about a Mesos master, Spark binary location in HDFS, and an executor options, you could start the container like so: + +``` +docker run -d -p 8888:8888 -e SPARK_OPTS='--master=mesos://10.10.10.10:5050 \ + --spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz \ + --spark.executor.memory=8g' jupyter/all-spark-notebook +``` + +Note that this is the same information expressed in a notebook in the Python case above. Once the kernel spec has your cluster information, you can test your cluster in an Apache Toree notebook like so: + +```scala +// should print the value of --master in the kernel spec +println(sc.master) + +// do something to prove it works +val rdd = sc.parallelize(0 to 99999999) +rdd.sum() +``` + +### Connecting to a Spark Cluster in Standalone Mode + +Connection to Spark Cluster on Standalone Mode requires the following set of steps: + +0. Verify that the docker image (check the Dockerfile) and the Spark Cluster which is being deployed, run the same version of Spark. +1. [Deploy Spark in Standalone Mode](http://spark.apache.org/docs/latest/spark-standalone.html). +2. Run the Docker container with `--net=host` in a location that is network addressable by all of your Spark workers. (This is a [Spark networking requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).) + * NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details. +3. The language specific instructions are almost same as mentioned above for Mesos, only the master url would now be something like spark://10.10.10.10:7077 + +## Tensorflow + +The `jupyter/tensorflow-notebook` image supports the use of [Tensorflow](https://www.tensorflow.org/) in single machine or distributed mode. + +### Single Machine Mode + +```python +import tensorflow as tf + +hello = tf.Variable('Hello World!') + +sess = tf.Session() +init = tf.global_variables_initializer() + +sess.run(init) +sess.run(hello) +``` + +### Distributed Mode + +```python +import tensorflow as tf + +hello = tf.Variable('Hello Distributed World!') + +server = tf.train.Server.create_local_server() +sess = tf.Session(server.target) +init = tf.global_variables_initializer() + +sess.run(init) +sess.run(hello) +``` \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index 2352874b..4dc6b364 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,6 @@ docker -jupyter_alabaster_theme pytest recommonmark==0.4.0 requests sphinx>=1.6 -sphinx_rtd_theme \ No newline at end of file +sphinx_rtd_theme From d03a419e17820d752b02de5d653e25f7796949da Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 25 Mar 2018 21:15:04 -0400 Subject: [PATCH 12/17] Fix path to readthedocs.yaml --- readthedocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs.yml b/readthedocs.yml index 0e4eabfc..b020577c 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,4 +1,4 @@ conda: - file: environment.yml + file: docs/environment.yml python: version: 3 \ No newline at end of file From e34a17e02488894296ed8dfaea79fc83eff6fae0 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 25 Mar 2018 21:17:42 -0400 Subject: [PATCH 13/17] Try the RTD path fix again --- readthedocs.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/readthedocs.yml b/readthedocs.yml index b020577c..48bdcacd 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,4 +1,3 @@ -conda: - file: docs/environment.yml +requirements_file: requirements-dev.txt python: version: 3 \ No newline at end of file From a7407c115d0c1d22f0fa8c2be69d9e845dfeba29 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 25 Mar 2018 21:27:54 -0400 Subject: [PATCH 14/17] Fix link to stack contrib page --- docs/using/selecting.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/using/selecting.md b/docs/using/selecting.md index 7105b963..a9607fed 100644 --- a/docs/using/selecting.md +++ b/docs/using/selecting.md @@ -140,5 +140,4 @@ The core stacks are just a tiny sample of what's possible when combining Jupyter *Nothing here yet! You can be the first!* -See the [contributing guide](contributing#Community-Stacks) for information about how to create your own Jupyter Docker Stack. - +See the [contributing guide](contributing/stacks) for information about how to create your own Jupyter Docker Stack. \ No newline at end of file From 71978c31b46a5d66ffc9afbb9a4309679057b960 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Sun, 25 Mar 2018 21:35:30 -0400 Subject: [PATCH 15/17] Fix TOC heading level --- docs/index.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index abfb0bc0..d6ec112d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,7 +16,8 @@ The two examples below may help you get started if you `have Docker installed Date: Mon, 26 Mar 2018 20:44:02 -0400 Subject: [PATCH 16/17] Address initial review comments --- docs/index.rst | 6 +++++- docs/using/common.md | 6 ------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index d6ec112d..f99355af 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,11 @@ Jupyter Docker Stacks ===================== -Jupyter Docker Stacks are a set of ready-to-run Docker images containing Jupyter applications and interactive computing tools. You can use a stack image to start a personal Jupyter Notebook server in a local Docker container, to run JupyterLab servers for a team using JupyterHub, to write your own project Dockerfile, and so on. +Jupyter Docker Stacks are a set of ready-to-run Docker images containing Jupyter applications and interactive computing tools. You can use a stack image to do any of the following (and more): + +* Start a personal Jupyter Notebook server in a local Docker container +* Run JupyterLab servers for a team using JupyterHub +* Write your own project Dockerfile Quick Start ----------- diff --git a/docs/using/common.md b/docs/using/common.md index 16c233f9..47dce538 100644 --- a/docs/using/common.md +++ b/docs/using/common.md @@ -18,12 +18,6 @@ For example, to set the base URL of the notebook server, you can run the followi docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.base_url=/some/path ``` -For example, to ignore best practice and disable all authentication, you can run the following: - -``` -docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.token='' -``` - ## Docker Options You may instruct the `start-notebook.sh` script to customize the container environment before launching From 09d5f5aa80c5da4c8f91483d944ca3fb44e4b7b9 Mon Sep 17 00:00:00 2001 From: Peter Parente Date: Mon, 26 Mar 2018 20:44:40 -0400 Subject: [PATCH 17/17] Fix typo too --- docs/using/common.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/using/common.md b/docs/using/common.md index 47dce538..6eaf72cd 100644 --- a/docs/using/common.md +++ b/docs/using/common.md @@ -27,7 +27,7 @@ the notebook server. You do so by passing arguments to the `docker run` command. * `-e NB_UID=1000` - Instructs the startup script to switch the numeric user ID of `$NB_USER` to the given value. This feature is useful when mounting host volumes with specific owner permissions. For this option to take effect, you must run the container with `--user root`. (The startup script will `su $NB_USER` after adjusting the user ID.) * `-e NB_GID=100` - Instructs the startup script to change the numeric group ID of the `$NB_USER` to the given value. This feature is useful when mounting host volumes with specific group permissions. For this option to take effect, you must run the container with `--user root`. (The startup script will `su $NB_USER` after adjusting the group ID.) * `-e CHOWN_HOME=yes` - Instructs the startup script to recursively change the `$NB_USER` home directory owner and group to the current value of `$NB_UID` and `$NB_GID`. This change will take effect even if the user home directory is mounted from the host using `-v` as described below. -* `-e GRANT_SUDO=yes` - Instructs the startup script to grant the `NB_USER` user passwordless `sudo` capability. You do **not** need too this option to allow the user to `conda` or `pip` install additional packages. This option is useful, however, when you wish to give `$NB_USER` the ability to install OS packages with `apt` or modify other root-owned files in the container. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su $NB_USER` after adding `$NB_USER` to sudoers.) **You should only enable `sudo` if you trust the user or if the container is running on an isolated host.** +* `-e GRANT_SUDO=yes` - Instructs the startup script to grant the `NB_USER` user passwordless `sudo` capability. You do **not** need this option to allow the user to `conda` or `pip` install additional packages. This option is useful, however, when you wish to give `$NB_USER` the ability to install OS packages with `apt` or modify other root-owned files in the container. For this option to take effect, you must run the container with `--user root`. (The `start-notebook.sh` script will `su $NB_USER` after adding `$NB_USER` to sudoers.) **You should only enable `sudo` if you trust the user or if the container is running on an isolated host.** * `-e GEN_CERT=yes` - Instructs the startup script to generates a self-signed SSL certificate and configure Jupyter Notebook to use it to accept encrypted HTTPS connections. * `-v /some/host/folder/for/work:/home/jovyan/work` - Mounts a host machine directory as folder in the container. Useful when you want to preserve notebooks and other work even after the container is destroyed. **You must grant the within-container notebook user or group (`NB_UID` or `NB_GID`) write access to the host directory (e.g., `sudo chown 1000 /some/host/folder/for/work`).** * `-user 5000 --group-add users` - Launches the container with a specific user ID and adds that user to the `users` group so that it can modify files in the default home directory and `/opt/conda`. You can use these arguments as alternatives to setting `$NB_UID` and `$NB_GID`.