mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-07 18:14:05 +00:00
Merge branch 'master' into asalikhov/ubuntu_focal
This commit is contained in:
60
all-spark-notebook/test/data/local_pyspark.ipynb
Normal file
60
all-spark-notebook/test/data/local_pyspark.ipynb
Normal file
@@ -0,0 +1,60 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "error",
|
||||
"ename": "Error",
|
||||
"evalue": "Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
|
||||
"traceback": [
|
||||
"Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
|
||||
"at b.startServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:270430)",
|
||||
"at async b.createServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:269873)",
|
||||
"at async connect (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:397876)",
|
||||
"at async w.ensureConnectionAndNotebookImpl (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556625)",
|
||||
"at async w.ensureConnectionAndNotebook (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556303)",
|
||||
"at async w.clearResult (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:552346)",
|
||||
"at async w.reexecuteCell (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:540374)",
|
||||
"at async w.reexecuteCells (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:537541)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"# Spark session & context\n",
|
||||
"spark = SparkSession.builder.master('local').getOrCreate()\n",
|
||||
"sc = spark.sparkContext\n",
|
||||
"\n",
|
||||
"# Sum of the first 100 whole numbers\n",
|
||||
"rdd = sc.parallelize(range(100 + 1))\n",
|
||||
"rdd.sum()\n",
|
||||
"# 5050"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
41
all-spark-notebook/test/data/local_sparkR.ipynb
Normal file
41
all-spark-notebook/test/data/local_sparkR.ipynb
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"library(SparkR)\n",
|
||||
"\n",
|
||||
"# Spark session & context\n",
|
||||
"sc <- sparkR.session(\"local\")\n",
|
||||
"\n",
|
||||
"# Sum of the first 100 whole numbers\n",
|
||||
"sdf <- createDataFrame(list(1:100))\n",
|
||||
"dapplyCollect(sdf,\n",
|
||||
" function(x) \n",
|
||||
" { x <- sum(x)}\n",
|
||||
" )\n",
|
||||
"# 5050"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "R",
|
||||
"language": "R",
|
||||
"name": "ir"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": "r",
|
||||
"file_extension": ".r",
|
||||
"mimetype": "text/x-r-source",
|
||||
"name": "R",
|
||||
"pygments_lexer": "r",
|
||||
"version": "3.6.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
43
all-spark-notebook/test/data/local_sparklyr.ipynb
Normal file
43
all-spark-notebook/test/data/local_sparklyr.ipynb
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"library(sparklyr)\n",
|
||||
"\n",
|
||||
"# get the default config\n",
|
||||
"conf <- spark_config()\n",
|
||||
"# Set the catalog implementation in-memory\n",
|
||||
"conf$spark.sql.catalogImplementation <- \"in-memory\"\n",
|
||||
"\n",
|
||||
"# Spark session & context\n",
|
||||
"sc <- spark_connect(master = \"local\", config = conf)\n",
|
||||
"\n",
|
||||
"# Sum of the first 100 whole numbers\n",
|
||||
"sdf_len(sc, 100, repartition = 1) %>% \n",
|
||||
" spark_apply(function(e) sum(e))\n",
|
||||
"# 5050"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "R",
|
||||
"language": "R",
|
||||
"name": "ir"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": "r",
|
||||
"file_extension": ".r",
|
||||
"mimetype": "text/x-r-source",
|
||||
"name": "R",
|
||||
"pygments_lexer": "r",
|
||||
"version": "3.6.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
63
all-spark-notebook/test/data/local_spylon.ipynb
Normal file
63
all-spark-notebook/test/data/local_spylon.ipynb
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%init_spark\n",
|
||||
"# Spark session & context\n",
|
||||
"launcher.master = \"local\"\n",
|
||||
"launcher.conf.spark.executor.cores = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[8] at parallelize at <console>:28\n",
|
||||
"res4: Double = 5050.0\n"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"// Sum of the first 100 whole numbers\n",
|
||||
"val rdd = sc.parallelize(0 to 100)\n",
|
||||
"rdd.sum()\n",
|
||||
"// 5050"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "spylon-kernel",
|
||||
"language": "scala",
|
||||
"name": "spylon-kernel"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": "text/x-scala",
|
||||
"file_extension": ".scala",
|
||||
"help_links": [
|
||||
{
|
||||
"text": "MetaKernel Magics",
|
||||
"url": "https://metakernel.readthedocs.io/en/latest/source/README.html"
|
||||
}
|
||||
],
|
||||
"mimetype": "text/x-scala",
|
||||
"name": "scala",
|
||||
"pygments_lexer": "scala",
|
||||
"version": "0.4.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
89
all-spark-notebook/test/data/local_toree.ipynb
Normal file
89
all-spark-notebook/test/data/local_toree.ipynb
Normal file
@@ -0,0 +1,89 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Waiting for a Spark session to start..."
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"spark://master:7077\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"// should print the value of --master in the kernel spec\n",
|
||||
"println(sc.master)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Waiting for a Spark session to start..."
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"rdd = ParallelCollectionRDD[0] at parallelize at <console>:28\n"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"5050.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"// Sum of the first 100 whole numbers\n",
|
||||
"val rdd = sc.parallelize(0 to 100)\n",
|
||||
"rdd.sum()\n",
|
||||
"// 5050"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Apache Toree - Scala",
|
||||
"language": "scala",
|
||||
"name": "apache_toree_scala"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": "text/x-scala",
|
||||
"file_extension": ".scala",
|
||||
"mimetype": "text/x-scala",
|
||||
"name": "scala",
|
||||
"pygments_lexer": "scala",
|
||||
"version": "2.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
35
all-spark-notebook/test/test_spark_notebooks.py
Normal file
35
all-spark-notebook/test/test_spark_notebooks.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (c) Jupyter Development Team.
|
||||
# Distributed under the terms of the Modified BSD License.
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
import os
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_file",
|
||||
# TODO: add local_sparklyr
|
||||
["local_pyspark", "local_spylon", "local_toree", "local_sparkR"],
|
||||
)
|
||||
def test_nbconvert(container, test_file):
|
||||
"""Check if Spark notebooks can be executed"""
|
||||
host_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
|
||||
cont_data_dir = "/home/jovyan/data"
|
||||
output_dir = "/tmp"
|
||||
timeout_ms = 600
|
||||
LOGGER.info(f"Test that {test_file} notebook can be executed ...")
|
||||
command = f"jupyter nbconvert --to markdown --ExecutePreprocessor.timeout={timeout_ms} --output-dir {output_dir} --execute {cont_data_dir}/{test_file}.ipynb"
|
||||
c = container.run(
|
||||
volumes={host_data_dir: {"bind": cont_data_dir, "mode": "ro"}},
|
||||
tty=True,
|
||||
command=["start.sh", "bash", "-c", command],
|
||||
)
|
||||
rv = c.wait(timeout=timeout_ms / 10 + 10)
|
||||
assert rv == 0 or rv["StatusCode"] == 0, f"Command {command} failed"
|
||||
logs = c.logs(stdout=True).decode("utf-8")
|
||||
LOGGER.debug(logs)
|
||||
expected_file = f"{output_dir}/{test_file}.md"
|
||||
assert expected_file in logs, f"Expected file {expected_file} not generated"
|
@@ -25,9 +25,9 @@ If there's agreement that the feature belongs in one or more of the core stacks:
|
||||
|
||||
1. Implement the feature in a local clone of the `jupyter/docker-stacks` project.
|
||||
2. Please build the image locally before submitting a pull request. Building the image locally shortens the debugging cycle by taking some load off [Travis CI](http://travis-ci.org/), which graciously provides free build services for open source projects like this one. If you use `make`, call:
|
||||
```
|
||||
make build/somestack-notebook
|
||||
```
|
||||
```bash
|
||||
make build/somestack-notebook
|
||||
```
|
||||
3. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes.
|
||||
4. Watch for Travis to report a build success or failure for your PR on GitHub.
|
||||
5. Discuss changes with the maintainers and address any build issues.
|
||||
|
@@ -7,9 +7,9 @@ Please follow the process below to update a package version:
|
||||
1. Locate the Dockerfile containing the library you wish to update (e.g., [base-notebook/Dockerfile](https://github.com/jupyter/docker-stacks/blob/master/base-notebook/Dockerfile), [scipy-notebook/Dockerfile](https://github.com/jupyter/docker-stacks/blob/master/scipy-notebook/Dockerfile))
|
||||
2. Adjust the version number for the package. We prefer to pin the major and minor version number of packages so as to minimize rebuild side-effects when users submit pull requests (PRs). For example, you'll find the Jupyter Notebook package, `notebook`, installed using conda with `notebook=5.4.*`.
|
||||
3. Please build the image locally before submitting a pull request. Building the image locally shortens the debugging cycle by taking some load off [Travis CI](http://travis-ci.org/), which graciously provides free build services for open source projects like this one. If you use `make`, call:
|
||||
```
|
||||
make build/somestack-notebook
|
||||
```
|
||||
```bash
|
||||
make build/somestack-notebook
|
||||
```
|
||||
4. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes.
|
||||
5. Watch for Travis to report a build success or failure for your PR on GitHub.
|
||||
6. Discuss changes with the maintainers and address any build issues. Version conflicts are the most common problem. You may need to upgrade additional packages to fix build failures.
|
||||
|
@@ -13,13 +13,13 @@ This approach mirrors how we build and share the core stack images. Feel free to
|
||||
|
||||
First, install [cookiecutter](https://github.com/audreyr/cookiecutter) using pip or conda:
|
||||
|
||||
```
|
||||
```bash
|
||||
pip install cookiecutter # or conda install cookiecutter
|
||||
```
|
||||
|
||||
Run the cookiecutter command pointing to the [jupyter/cookiecutter-docker-stacks](https://github.com/jupyter/cookiecutter-docker-stacks) project on GitHub.
|
||||
|
||||
```
|
||||
```bash
|
||||
cookiecutter https://github.com/jupyter/cookiecutter-docker-stacks.git
|
||||
```
|
||||
|
||||
|
@@ -13,10 +13,10 @@ Please follow the process below to add new tests:
|
||||
1. If the test should run against every image built, add your test code to one of the modules in [test/](https://github.com/jupyter/docker-stacks/tree/master/test) or create a new module.
|
||||
2. If your test should run against a single image, add your test code to one of the modules in `some-notebook/test/` or create a new module.
|
||||
3. Build one or more images you intend to test and run the tests locally. If you use `make`, call:
|
||||
```
|
||||
make build/somestack-notebook
|
||||
make test/somestack-notebook
|
||||
```
|
||||
```bash
|
||||
make build/somestack-notebook
|
||||
make test/somestack-notebook
|
||||
```
|
||||
4. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes.
|
||||
5. Watch for Travis to report a build success or failure for your PR on GitHub.
|
||||
6. Discuss changes with the maintainers and address any issues running the tests on Travis.
|
||||
6. Discuss changes with the maintainers and address any issues running the tests on Travis.
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -8,13 +8,13 @@ This page describes the options supported by the startup script as well as how t
|
||||
|
||||
You can pass [Jupyter command line options](https://jupyter.readthedocs.io/en/latest/projects/jupyter-command.html) to the `start-notebook.sh` script when launching the container. For example, to secure the Notebook server with a custom password hashed using `IPython.lib.passwd()` instead of the default token, you can run the following:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.password='sha1:74ba40f8a388:c913541b7ee99d15d5ed31d4226bf7838f83a50e'
|
||||
```
|
||||
|
||||
For example, to set the base URL of the notebook server, you can run the following:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run -d -p 8888:8888 jupyter/base-notebook start-notebook.sh --NotebookApp.base_url=/some/path
|
||||
```
|
||||
|
||||
@@ -54,7 +54,7 @@ script for execution details.
|
||||
|
||||
You may mount SSL key and certificate files into a container and configure Jupyter Notebook to use them to accept HTTPS connections. For example, to mount a host folder containing a `notebook.key` and `notebook.crt` and use them, you might run the following:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run -d -p 8888:8888 \
|
||||
-v /some/host/folder:/etc/ssl/notebook \
|
||||
jupyter/base-notebook start-notebook.sh \
|
||||
@@ -64,7 +64,7 @@ docker run -d -p 8888:8888 \
|
||||
|
||||
Alternatively, you may mount a single PEM file containing both the key and certificate. For example:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run -d -p 8888:8888 \
|
||||
-v /some/host/folder/notebook.pem:/etc/ssl/notebook.pem \
|
||||
jupyter/base-notebook start-notebook.sh \
|
||||
@@ -85,13 +85,13 @@ For additional information about using SSL, see the following:
|
||||
|
||||
The `start-notebook.sh` script actually inherits most of its option handling capability from a more generic `start.sh` script. The `start.sh` script supports all of the features described above, but allows you to specify an arbitrary command to execute. For example, to run the text-based `ipython` console in a container, do the following:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run -it --rm jupyter/base-notebook start.sh ipython
|
||||
```
|
||||
|
||||
Or, to run JupyterLab instead of the classic notebook, run the following:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run -it --rm -p 8888:8888 jupyter/base-notebook start.sh jupyter lab
|
||||
```
|
||||
|
||||
@@ -107,7 +107,7 @@ The default Python 3.x [Conda environment](http://conda.pydata.org/docs/using/en
|
||||
|
||||
The `jovyan` user has full read/write access to the `/opt/conda` directory. You can use either `conda` or `pip` to install new packages without any additional permissions.
|
||||
|
||||
```
|
||||
```bash
|
||||
# install a package into the default (python 3.x) environment
|
||||
pip install some-package
|
||||
conda install some-package
|
||||
|
@@ -17,7 +17,7 @@ orchestrator config.
|
||||
|
||||
For example:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run -it -e GRANT_SUDO=yes --user root jupyter/minimal-notebook
|
||||
```
|
||||
|
||||
@@ -75,7 +75,7 @@ Python 2.x was removed from all images on August 10th, 2017, starting in tag `cc
|
||||
add a Python 2.x environment by defining your own Dockerfile inheriting from one of the images like
|
||||
so:
|
||||
|
||||
```
|
||||
```dockerfile
|
||||
# Choose your desired base image
|
||||
FROM jupyter/scipy-notebook:latest
|
||||
|
||||
@@ -103,7 +103,7 @@ Ref:
|
||||
The default version of Python that ships with conda/ubuntu may not be the version you want.
|
||||
To add a conda environment with a different version and make it accessible to Jupyter, the instructions are very similar to Python 2.x but are slightly simpler (no need to switch to `root`):
|
||||
|
||||
```
|
||||
```dockerfile
|
||||
# Choose your desired base image
|
||||
FROM jupyter/minimal-notebook:latest
|
||||
|
||||
@@ -168,12 +168,12 @@ ENTRYPOINT ["jupyter", "lab", "--ip=0.0.0.0", "--allow-root"]
|
||||
```
|
||||
|
||||
And build the image as:
|
||||
```
|
||||
```bash
|
||||
docker build -t jupyter/scipy-dasklabextension:latest .
|
||||
```
|
||||
|
||||
Once built, run using the command:
|
||||
```
|
||||
```bash
|
||||
docker run -it --rm -p 8888:8888 -p 8787:8787 jupyter/scipy-dasklabextension:latest
|
||||
```
|
||||
|
||||
@@ -194,7 +194,7 @@ Ref:
|
||||
[RISE](https://github.com/damianavila/RISE) allows via extension to create live slideshows of your
|
||||
notebooks, with no conversion, adding javascript Reveal.js:
|
||||
|
||||
```
|
||||
```bash
|
||||
# Add Live slideshows with RISE
|
||||
RUN conda install -c damianavila82 rise
|
||||
```
|
||||
@@ -207,7 +207,7 @@ Credit: [Paolo D.](https://github.com/pdonorio) based on
|
||||
You need to install conda's gcc for Python xgboost to work properly. Otherwise, you'll get an
|
||||
exception about libgomp.so.1 missing GOMP_4.0.
|
||||
|
||||
```
|
||||
```bash
|
||||
%%bash
|
||||
conda install -y gcc
|
||||
pip install xgboost
|
||||
@@ -312,8 +312,8 @@ Credit: [Justin Tyberg](https://github.com/jtyberg), [quanghoc](https://github.c
|
||||
To use a specific version of JupyterHub, the version of `jupyterhub` in your image should match the
|
||||
version in the Hub itself.
|
||||
|
||||
```
|
||||
FROM jupyter/base-notebook:5ded1de07260
|
||||
```dockerfile
|
||||
FROM jupyter/base-notebook:5ded1de07260
|
||||
RUN pip install jupyterhub==0.8.0b1
|
||||
```
|
||||
|
||||
@@ -375,7 +375,7 @@ Ref:
|
||||
|
||||
### Using Local Spark JARs
|
||||
|
||||
```
|
||||
```python
|
||||
import os
|
||||
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell'
|
||||
import pyspark
|
||||
@@ -404,7 +404,7 @@ Ref:
|
||||
|
||||
### Use jupyter/all-spark-notebooks with an existing Spark/YARN cluster
|
||||
|
||||
```
|
||||
```dockerfile
|
||||
FROM jupyter/all-spark-notebook
|
||||
|
||||
# Set env vars for pydoop
|
||||
@@ -480,13 +480,13 @@ convenient to launch the server without a password or token. In this case, you s
|
||||
|
||||
For jupyterlab:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run jupyter/base-notebook:6d2a05346196 start.sh jupyter lab --LabApp.token=''
|
||||
```
|
||||
|
||||
For jupyter classic:
|
||||
|
||||
```
|
||||
```bash
|
||||
docker run jupyter/base-notebook:6d2a05346196 start.sh jupyter notebook --NotebookApp.token=''
|
||||
```
|
||||
|
||||
@@ -494,7 +494,7 @@ docker run jupyter/base-notebook:6d2a05346196 start.sh jupyter notebook --Notebo
|
||||
|
||||
NB: this works for classic notebooks only
|
||||
|
||||
```
|
||||
```dockerfile
|
||||
# Update with your base image of choice
|
||||
FROM jupyter/minimal-notebook:latest
|
||||
|
||||
@@ -513,7 +513,7 @@ Ref:
|
||||
|
||||
Using `auto-sklearn` requires `swig`, which the other notebook images lack, so it cant be experimented with. Also, there is no Conda package for `auto-sklearn`.
|
||||
|
||||
```
|
||||
```dockerfile
|
||||
ARG BASE_CONTAINER=jupyter/scipy-notebook
|
||||
FROM jupyter/scipy-notebook:latest
|
||||
|
||||
|
@@ -5,7 +5,8 @@ This page provides details about features specific to one or more images.
|
||||
## Apache Spark
|
||||
|
||||
**Specific Docker Image Options**
|
||||
* `-p 4040:4040` - The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images open [SparkUI (Spark Monitoring and Instrumentation UI)](http://spark.apache.org/docs/latest/monitoring.html) at default port `4040`, this option map `4040` port inside docker container to `4040` port on host machine . Note every new spark context that is created is put onto an incrementing port (ie. 4040, 4041, 4042, etc.), and it might be necessary to open multiple ports. For example: `docker run -d -p 8888:8888 -p 4040:4040 -p 4041:4041 jupyter/pyspark-notebook`
|
||||
|
||||
* `-p 4040:4040` - The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images open [SparkUI (Spark Monitoring and Instrumentation UI)](http://spark.apache.org/docs/latest/monitoring.html) at default port `4040`, this option map `4040` port inside docker container to `4040` port on host machine . Note every new spark context that is created is put onto an incrementing port (ie. 4040, 4041, 4042, etc.), and it might be necessary to open multiple ports. For example: `docker run -d -p 8888:8888 -p 4040:4040 -p 4041:4041 jupyter/pyspark-notebook`.
|
||||
|
||||
**Usage Examples**
|
||||
|
||||
@@ -13,30 +14,66 @@ The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images support t
|
||||
|
||||
### Using Spark Local Mode
|
||||
|
||||
Spark local mode is useful for experimentation on small data when you do not have a Spark cluster available.
|
||||
Spark **local mode** is useful for experimentation on small data when you do not have a Spark cluster available.
|
||||
|
||||
#### In a Python Notebook
|
||||
#### In Python
|
||||
|
||||
In a Python notebook.
|
||||
|
||||
```python
|
||||
from pyspark.sql import SparkSession
|
||||
spark = SparkSession.builder.appName("SimpleApp").getOrCreate()
|
||||
# do something to prove it works
|
||||
spark.sql('SELECT "Test" as c1').show()
|
||||
|
||||
# Spark session & context
|
||||
spark = SparkSession.builder.master('local').getOrCreate()
|
||||
sc = spark.sparkContext
|
||||
|
||||
# Sum of the first 100 whole numbers
|
||||
rdd = sc.parallelize(range(100 + 1))
|
||||
rdd.sum()
|
||||
# 5050
|
||||
```
|
||||
|
||||
#### In a R Notebook
|
||||
#### In R
|
||||
|
||||
```r
|
||||
In a R notebook with [SparkR][sparkr].
|
||||
|
||||
```R
|
||||
library(SparkR)
|
||||
|
||||
as <- sparkR.session("local[*]")
|
||||
# Spark session & context
|
||||
sc <- sparkR.session("local")
|
||||
|
||||
# do something to prove it works
|
||||
df <- as.DataFrame(iris)
|
||||
head(filter(df, df$Petal_Width > 0.2))
|
||||
# Sum of the first 100 whole numbers
|
||||
sdf <- createDataFrame(list(1:100))
|
||||
dapplyCollect(sdf,
|
||||
function(x)
|
||||
{ x <- sum(x)}
|
||||
)
|
||||
# 5050
|
||||
```
|
||||
|
||||
#### In a Spylon Kernel Scala Notebook
|
||||
In a R notebook with [sparklyr][sparklyr].
|
||||
|
||||
```R
|
||||
library(sparklyr)
|
||||
|
||||
# Spark configuration
|
||||
conf <- spark_config()
|
||||
# Set the catalog implementation in-memory
|
||||
conf$spark.sql.catalogImplementation <- "in-memory"
|
||||
|
||||
# Spark session & context
|
||||
sc <- spark_connect(master = "local", config = conf)
|
||||
|
||||
# Sum of the first 100 whole numbers
|
||||
sdf_len(sc, 100, repartition = 1) %>%
|
||||
spark_apply(function(e) sum(e))
|
||||
# 5050
|
||||
```
|
||||
|
||||
#### In Scala
|
||||
|
||||
##### In a Spylon Kernel
|
||||
|
||||
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
|
||||
options in a `%%init_spark` magic cell.
|
||||
@@ -44,27 +81,30 @@ options in a `%%init_spark` magic cell.
|
||||
```python
|
||||
%%init_spark
|
||||
# Configure Spark to use a local master
|
||||
launcher.master = "local[*]"
|
||||
launcher.master = "local"
|
||||
```
|
||||
|
||||
```scala
|
||||
// Now run Scala code that uses the initialized SparkContext in sc
|
||||
val rdd = sc.parallelize(0 to 999)
|
||||
rdd.takeSample(false, 5)
|
||||
// Sum of the first 100 whole numbers
|
||||
val rdd = sc.parallelize(0 to 100)
|
||||
rdd.sum()
|
||||
// 5050
|
||||
```
|
||||
|
||||
#### In an Apache Toree Scala Notebook
|
||||
##### In an Apache Toree Kernel
|
||||
|
||||
Apache Toree instantiates a local `SparkContext` for you in variable `sc` when the kernel starts.
|
||||
|
||||
```scala
|
||||
val rdd = sc.parallelize(0 to 999)
|
||||
rdd.takeSample(false, 5)
|
||||
// Sum of the first 100 whole numbers
|
||||
val rdd = sc.parallelize(0 to 100)
|
||||
rdd.sum()
|
||||
// 5050
|
||||
```
|
||||
|
||||
### Connecting to a Spark Cluster in Standalone Mode
|
||||
|
||||
Connection to Spark Cluster on Standalone Mode requires the following set of steps:
|
||||
Connection to Spark Cluster on **[Standalone Mode](https://spark.apache.org/docs/latest/spark-standalone.html)** requires the following set of steps:
|
||||
|
||||
0. Verify that the docker image (check the Dockerfile) and the Spark Cluster which is being
|
||||
deployed, run the same version of Spark.
|
||||
@@ -72,98 +112,107 @@ Connection to Spark Cluster on Standalone Mode requires the following set of ste
|
||||
2. Run the Docker container with `--net=host` in a location that is network addressable by all of
|
||||
your Spark workers. (This is a [Spark networking
|
||||
requirement](http://spark.apache.org/docs/latest/cluster-overview.html#components).)
|
||||
* NOTE: When using `--net=host`, you must also use the flags `--pid=host -e
|
||||
TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details.
|
||||
* NOTE: When using `--net=host`, you must also use the flags `--pid=host -e
|
||||
TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details.
|
||||
|
||||
#### In a Python Notebook
|
||||
**Note**: In the following examples we are using the Spark master URL `spark://master:7077` that shall be replaced by the URL of the Spark master.
|
||||
|
||||
#### In Python
|
||||
|
||||
The **same Python version** need to be used on the notebook (where the driver is located) and on the Spark workers.
|
||||
The python version used at driver and worker side can be adjusted by setting the environment variables `PYSPARK_PYTHON` and / or `PYSPARK_DRIVER_PYTHON`, see [Spark Configuration][spark-conf] for more information.
|
||||
|
||||
```python
|
||||
import os
|
||||
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
import pyspark
|
||||
conf = pyspark.SparkConf()
|
||||
# Spark session & context
|
||||
spark = SparkSession.builder.master('spark://master:7077').getOrCreate()
|
||||
sc = spark.sparkContext
|
||||
|
||||
# Point to spark master
|
||||
conf.setMaster("spark://10.10.10.10:7070")
|
||||
# point to spark binary package in HDFS or on local filesystem on all slave
|
||||
# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz)
|
||||
conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-2.2.0-bin-hadoop2.7.tgz")
|
||||
# set other options as desired
|
||||
conf.set("spark.executor.memory", "8g")
|
||||
conf.set("spark.core.connection.ack.wait.timeout", "1200")
|
||||
|
||||
# create the context
|
||||
sc = pyspark.SparkContext(conf=conf)
|
||||
|
||||
# do something to prove it works
|
||||
rdd = sc.parallelize(range(100000000))
|
||||
rdd.sumApprox(3)
|
||||
# Sum of the first 100 whole numbers
|
||||
rdd = sc.parallelize(range(100 + 1))
|
||||
rdd.sum()
|
||||
# 5050
|
||||
```
|
||||
|
||||
#### In a R Notebook
|
||||
#### In R
|
||||
|
||||
```r
|
||||
In a R notebook with [SparkR][sparkr].
|
||||
|
||||
```R
|
||||
library(SparkR)
|
||||
|
||||
# Point to spark master
|
||||
# Point to spark binary package in HDFS or on local filesystem on all worker
|
||||
# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz) in sparkEnvir
|
||||
# Set other options in sparkEnvir
|
||||
sc <- sparkR.session("spark://10.10.10.10:7070", sparkEnvir=list(
|
||||
spark.executor.uri="hdfs://10.10.10.10/spark/spark-2.4.3-bin-hadoop2.7.tgz",
|
||||
spark.executor.memory="8g"
|
||||
)
|
||||
)
|
||||
# Spark session & context
|
||||
sc <- sparkR.session("spark://master:7077")
|
||||
|
||||
# do something to prove it works
|
||||
data(iris)
|
||||
df <- as.DataFrame(iris)
|
||||
head(filter(df, df$Petal_Width > 0.2))
|
||||
# Sum of the first 100 whole numbers
|
||||
sdf <- createDataFrame(list(1:100))
|
||||
dapplyCollect(sdf,
|
||||
function(x)
|
||||
{ x <- sum(x)}
|
||||
)
|
||||
# 5050
|
||||
```
|
||||
|
||||
#### In a Spylon Kernel Scala Notebook
|
||||
In a R notebook with [sparklyr][sparklyr].
|
||||
|
||||
```R
|
||||
library(sparklyr)
|
||||
|
||||
# Spark session & context
|
||||
# Spark configuration
|
||||
conf <- spark_config()
|
||||
# Set the catalog implementation in-memory
|
||||
conf$spark.sql.catalogImplementation <- "in-memory"
|
||||
sc <- spark_connect(master = "spark://master:7077", config = conf)
|
||||
|
||||
# Sum of the first 100 whole numbers
|
||||
sdf_len(sc, 100, repartition = 1) %>%
|
||||
spark_apply(function(e) sum(e))
|
||||
# 5050
|
||||
```
|
||||
|
||||
#### In Scala
|
||||
|
||||
##### In a Spylon Kernel
|
||||
|
||||
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
|
||||
options in a `%%init_spark` magic cell.
|
||||
|
||||
```python
|
||||
%%init_spark
|
||||
# Point to spark master
|
||||
launcher.master = "spark://10.10.10.10:7070"
|
||||
launcher.conf.spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.4.3-bin-hadoop2.7.tgz
|
||||
# Configure Spark to use a local master
|
||||
launcher.master = "spark://master:7077"
|
||||
```
|
||||
|
||||
```scala
|
||||
// Now run Scala code that uses the initialized SparkContext in sc
|
||||
val rdd = sc.parallelize(0 to 999)
|
||||
rdd.takeSample(false, 5)
|
||||
// Sum of the first 100 whole numbers
|
||||
val rdd = sc.parallelize(0 to 100)
|
||||
rdd.sum()
|
||||
// 5050
|
||||
```
|
||||
|
||||
#### In an Apache Toree Scala Notebook
|
||||
##### In an Apache Toree Scala Notebook
|
||||
|
||||
The Apache Toree kernel automatically creates a `SparkContext` when it starts based on configuration
|
||||
information from its command line arguments and environment variables. You can pass information
|
||||
about your cluster via the `SPARK_OPTS` environment variable when you spawn a container.
|
||||
The Apache Toree kernel automatically creates a `SparkContext` when it starts based on configuration information from its command line arguments and environment variables. You can pass information about your cluster via the `SPARK_OPTS` environment variable when you spawn a container.
|
||||
|
||||
For instance, to pass information about a standalone Spark master, Spark binary location in HDFS,
|
||||
and an executor options, you could start the container like so:
|
||||
For instance, to pass information about a standalone Spark master, you could start the container like so:
|
||||
|
||||
```
|
||||
docker run -d -p 8888:8888 -e SPARK_OPTS='--master=spark://10.10.10.10:7070 \
|
||||
--spark.executor.uri=hdfs://10.10.10.10/spark/spark-2.4.3-bin-hadoop2.7.tgz \
|
||||
--spark.executor.memory=8g' jupyter/all-spark-notebook
|
||||
```bash
|
||||
docker run -d -p 8888:8888 -e SPARK_OPTS='--master=spark://master:7077' \
|
||||
jupyter/all-spark-notebook
|
||||
```
|
||||
|
||||
Note that this is the same information expressed in a notebook in the Python case above. Once the
|
||||
kernel spec has your cluster information, you can test your cluster in an Apache Toree notebook like
|
||||
so:
|
||||
Note that this is the same information expressed in a notebook in the Python case above. Once the kernel spec has your cluster information, you can test your cluster in an Apache Toree notebook like so:
|
||||
|
||||
```scala
|
||||
// should print the value of --master in the kernel spec
|
||||
println(sc.master)
|
||||
|
||||
// do something to prove it works
|
||||
val rdd = sc.parallelize(0 to 99999999)
|
||||
// Sum of the first 100 whole numbers
|
||||
val rdd = sc.parallelize(0 to 100)
|
||||
rdd.sum()
|
||||
// 5050
|
||||
```
|
||||
|
||||
## Tensorflow
|
||||
@@ -199,3 +248,7 @@ init = tf.global_variables_initializer()
|
||||
sess.run(init)
|
||||
sess.run(hello)
|
||||
```
|
||||
|
||||
[sparkr]: https://spark.apache.org/docs/latest/sparkr.html
|
||||
[sparklyr]: https://spark.rstudio.com/
|
||||
[spark-conf]: https://spark.apache.org/docs/latest/configuration.html
|
@@ -12,7 +12,7 @@ See the [installation instructions](https://docs.docker.com/engine/installation/
|
||||
|
||||
Build and run a `jupyter/minimal-notebook` container on a VirtualBox VM on local desktop.
|
||||
|
||||
```
|
||||
```bash
|
||||
# create a Docker Machine-controlled VirtualBox VM
|
||||
bin/vbox.sh mymachine
|
||||
|
||||
@@ -28,7 +28,7 @@ notebook/up.sh
|
||||
|
||||
To stop and remove the container:
|
||||
|
||||
```
|
||||
```bash
|
||||
notebook/down.sh
|
||||
```
|
||||
|
||||
@@ -39,14 +39,14 @@ notebook/down.sh
|
||||
|
||||
You can customize the docker-stack notebook image to deploy by modifying the `notebook/Dockerfile`. For example, you can build and deploy a `jupyter/all-spark-notebook` by modifying the Dockerfile like so:
|
||||
|
||||
```
|
||||
```dockerfile
|
||||
FROM jupyter/all-spark-notebook:55d5ca6be183
|
||||
...
|
||||
```
|
||||
|
||||
Once you modify the Dockerfile, don't forget to rebuild the image.
|
||||
|
||||
```
|
||||
```bash
|
||||
# activate the docker machine
|
||||
eval "$(docker-machine env mymachine)"
|
||||
|
||||
@@ -57,14 +57,14 @@ notebook/build.sh
|
||||
|
||||
Yes. Set environment variables to specify unique names and ports when running the `up.sh` command.
|
||||
|
||||
```
|
||||
```bash
|
||||
NAME=my-notebook PORT=9000 notebook/up.sh
|
||||
NAME=your-notebook PORT=9001 notebook/up.sh
|
||||
```
|
||||
|
||||
To stop and remove the containers:
|
||||
|
||||
```
|
||||
```bash
|
||||
NAME=my-notebook notebook/down.sh
|
||||
NAME=your-notebook notebook/down.sh
|
||||
```
|
||||
@@ -78,7 +78,7 @@ The `up.sh` creates a Docker volume named after the notebook container with a `-
|
||||
|
||||
Yes. Set the `WORK_VOLUME` environment variable to the same value for each notebook.
|
||||
|
||||
```
|
||||
```bash
|
||||
NAME=my-notebook PORT=9000 WORK_VOLUME=our-work notebook/up.sh
|
||||
NAME=your-notebook PORT=9001 WORK_VOLUME=our-work notebook/up.sh
|
||||
```
|
||||
@@ -87,7 +87,7 @@ NAME=your-notebook PORT=9001 WORK_VOLUME=our-work notebook/up.sh
|
||||
|
||||
To run the notebook server with a self-signed certificate, pass the `--secure` option to the `up.sh` script. You must also provide a password, which will be used to secure the notebook server. You can specify the password by setting the `PASSWORD` environment variable, or by passing it to the `up.sh` script.
|
||||
|
||||
```
|
||||
```bash
|
||||
PASSWORD=a_secret notebook/up.sh --secure
|
||||
|
||||
# or
|
||||
@@ -103,7 +103,7 @@ This example includes the `bin/letsencrypt.sh` script, which runs the `letsencry
|
||||
|
||||
The following command will create a certificate chain and store it in a Docker volume named `mydomain-secrets`.
|
||||
|
||||
```
|
||||
```bash
|
||||
FQDN=host.mydomain.com EMAIL=myemail@somewhere.com \
|
||||
SECRETS_VOLUME=mydomain-secrets \
|
||||
bin/letsencrypt.sh
|
||||
@@ -111,7 +111,7 @@ FQDN=host.mydomain.com EMAIL=myemail@somewhere.com \
|
||||
|
||||
Now run `up.sh` with the `--letsencrypt` option. You must also provide the name of the secrets volume and a password.
|
||||
|
||||
```
|
||||
```bash
|
||||
PASSWORD=a_secret SECRETS_VOLUME=mydomain-secrets notebook/up.sh --letsencrypt
|
||||
|
||||
# or
|
||||
@@ -120,7 +120,7 @@ notebook/up.sh --letsencrypt --password a_secret --secrets mydomain-secrets
|
||||
|
||||
Be aware that Let's Encrypt has a pretty [low rate limit per domain](https://community.letsencrypt.org/t/public-beta-rate-limits/4772/3) at the moment. You can avoid exhausting your limit by testing against the Let's Encrypt staging servers. To hit their staging servers, set the environment variable `CERT_SERVER=--staging`.
|
||||
|
||||
```
|
||||
```bash
|
||||
FQDN=host.mydomain.com EMAIL=myemail@somewhere.com \
|
||||
CERT_SERVER=--staging \
|
||||
bin/letsencrypt.sh
|
||||
@@ -134,13 +134,13 @@ Yes, you should be able to deploy to any Docker Machine-controlled host. To mak
|
||||
|
||||
To create a Docker machine using a VirtualBox VM on local desktop:
|
||||
|
||||
```
|
||||
```bash
|
||||
bin/vbox.sh mymachine
|
||||
```
|
||||
|
||||
To create a Docker machine using a virtual device on IBM SoftLayer:
|
||||
|
||||
```
|
||||
```bash
|
||||
export SOFTLAYER_USER=my_softlayer_username
|
||||
export SOFTLAYER_API_KEY=my_softlayer_api_key
|
||||
export SOFTLAYER_DOMAIN=my.domain
|
||||
|
@@ -11,7 +11,7 @@ This folder contains a Makefile and a set of supporting files demonstrating how
|
||||
|
||||
To show what's possible, here's how to run the `jupyter/minimal-notebook` on a brand new local virtualbox.
|
||||
|
||||
```
|
||||
```bash
|
||||
# create a new VM
|
||||
make virtualbox-vm NAME=dev
|
||||
# make the new VM the active docker machine
|
||||
@@ -30,7 +30,7 @@ The last command will log the IP address and port to visit in your browser.
|
||||
|
||||
Yes. Specify a unique name and port on the `make notebook` command.
|
||||
|
||||
```
|
||||
```bash
|
||||
make notebook NAME=my-notebook PORT=9000
|
||||
make notebook NAME=your-notebook PORT=9001
|
||||
```
|
||||
@@ -39,7 +39,7 @@ make notebook NAME=your-notebook PORT=9001
|
||||
|
||||
Yes.
|
||||
|
||||
```
|
||||
```bash
|
||||
make notebook NAME=my-notebook PORT=9000 WORK_VOLUME=our-work
|
||||
make notebook NAME=your-notebook PORT=9001 WORK_VOLUME=our-work
|
||||
```
|
||||
@@ -52,7 +52,7 @@ Instead of `make notebook`, run `make self-signed-notebook PASSWORD=your_desired
|
||||
|
||||
Yes. Please.
|
||||
|
||||
```
|
||||
```bash
|
||||
make letsencrypt FQDN=host.mydomain.com EMAIL=myemail@somewhere.com
|
||||
make letsencrypt-notebook
|
||||
```
|
||||
@@ -61,7 +61,7 @@ The first command creates a Docker volume named after the notebook container wit
|
||||
|
||||
Be aware: Let's Encrypt has a pretty [low rate limit per domain](https://community.letsencrypt.org/t/public-beta-rate-limits/4772/3) at the moment. You can avoid exhausting your limit by testing against the Let's Encrypt staging servers. To hit their staging servers, set the environment variable `CERT_SERVER=--staging`.
|
||||
|
||||
```
|
||||
```bash
|
||||
make letsencrypt FQDN=host.mydomain.com EMAIL=myemail@somewhere.com CERT_SERVER=--staging
|
||||
```
|
||||
|
||||
@@ -69,7 +69,7 @@ Also, keep in mind Let's Encrypt certificates are short lived: 90 days at the mo
|
||||
|
||||
### My pip/conda/apt-get installs disappear every time I restart the container. Can I make them permanent?
|
||||
|
||||
```
|
||||
```bash
|
||||
# add your pip, conda, apt-get, etc. permanent features to the Dockerfile where
|
||||
# indicated by the comments in the Dockerfile
|
||||
vi Dockerfile
|
||||
@@ -79,7 +79,7 @@ make notebook
|
||||
|
||||
### How do I upgrade my Docker container?
|
||||
|
||||
```
|
||||
```bash
|
||||
make image DOCKER_ARGS=--pull
|
||||
make notebook
|
||||
```
|
||||
@@ -90,7 +90,7 @@ The first line pulls the latest version of the Docker image used in the local Do
|
||||
|
||||
Yes. As an example, there's a `softlayer.makefile` included in this repo as an example. You would use it like so:
|
||||
|
||||
```
|
||||
```bash
|
||||
make softlayer-vm NAME=myhost \
|
||||
SOFTLAYER_DOMAIN=your_desired_domain \
|
||||
SOFTLAYER_USER=your_user_id \
|
||||
|
@@ -16,7 +16,7 @@ Loading the Templates
|
||||
|
||||
To load the templates, login to OpenShift from the command line and run:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc create -f https://raw.githubusercontent.com/jupyter-on-openshift/docker-stacks/master/examples/openshift/templates.json
|
||||
```
|
||||
|
||||
@@ -33,7 +33,7 @@ Deploying a Notebook
|
||||
|
||||
To deploy a notebook from the command line using the template, run:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc new-app --template jupyter-notebook
|
||||
```
|
||||
|
||||
@@ -71,7 +71,7 @@ A password you can use when accessing the notebook will be auto generated and is
|
||||
|
||||
To see the hostname for accessing the notebook run:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc get routes
|
||||
```
|
||||
|
||||
@@ -95,7 +95,7 @@ Passing Template Parameters
|
||||
|
||||
To override the name for the notebook, the image used, and the password, you can pass template parameters using the ``--param`` option.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc new-app --template jupyter-notebook \
|
||||
--param APPLICATION_NAME=mynotebook \
|
||||
--param NOTEBOOK_IMAGE=jupyter/scipy-notebook:latest \
|
||||
@@ -120,7 +120,7 @@ Deleting the Notebook Instance
|
||||
|
||||
To delete the notebook instance, run ``oc delete`` using a label selector for the application name.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc delete all,configmap --selector app=mynotebook
|
||||
```
|
||||
|
||||
@@ -129,7 +129,7 @@ Enabling Jupyter Lab Interface
|
||||
|
||||
To enable the Jupyter Lab interface for a deployed notebook set the ``JUPYTER_ENABLE_LAB`` environment variable.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc set env dc/mynotebook JUPYTER_ENABLE_LAB=true
|
||||
```
|
||||
|
||||
@@ -140,7 +140,7 @@ Adding Persistent Storage
|
||||
|
||||
You can upload notebooks and other files using the web interface of the notebook. Any uploaded files or changes you make to them will be lost when the notebook instance is restarted. If you want to save your work, you need to add persistent storage to the notebook. To add persistent storage run:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc set volume dc/mynotebook --add \
|
||||
--type=pvc --claim-size=1Gi --claim-mode=ReadWriteOnce \
|
||||
--claim-name mynotebook-data --name data \
|
||||
@@ -149,7 +149,7 @@ oc set volume dc/mynotebook --add \
|
||||
|
||||
When you have deleted the notebook instance, if using a persistent volume, you will need to delete it in a separate step.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc delete pvc/mynotebook-data
|
||||
```
|
||||
|
||||
@@ -158,7 +158,7 @@ Customizing the Configuration
|
||||
|
||||
If you want to set any custom configuration for the notebook, you can edit the config map created by the template.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc edit configmap/mynotebook-cfg
|
||||
```
|
||||
|
||||
@@ -176,19 +176,19 @@ Because the configuration is Python code, ensure any indenting is correct. Any e
|
||||
|
||||
If the error is in the config map, edit it again to fix it and trigged a new deployment if necessary by running:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc rollout latest dc/mynotebook
|
||||
```
|
||||
|
||||
If you make an error in the configuration file stored in the persistent volume, you will need to scale down the notebook so it isn't running.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc scale dc/mynotebook --replicas 0
|
||||
```
|
||||
|
||||
Then run:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc debug dc/mynotebook
|
||||
```
|
||||
|
||||
@@ -196,7 +196,7 @@ to run the notebook in debug mode. This will provide you with an interactive ter
|
||||
|
||||
Start up the notebook again.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc scale dc/mynotebook --replicas 1
|
||||
```
|
||||
|
||||
@@ -207,7 +207,7 @@ The password for the notebook is supplied as a template parameter, or if not sup
|
||||
|
||||
If you want to change the password, you can do so by editing the environment variable on the deployment configuration.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc set env dc/mynotebook JUPYTER_NOTEBOOK_PASSWORD=mypassword
|
||||
```
|
||||
|
||||
@@ -232,13 +232,13 @@ If the image is in your OpenShift project, because you imported the image into O
|
||||
|
||||
This can be illustrated by first importing an image into the OpenShift project.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc import-image jupyter/datascience-notebook:latest --confirm
|
||||
```
|
||||
|
||||
Then deploy it using the name of the image stream created.
|
||||
|
||||
```
|
||||
```bash
|
||||
oc new-app --template jupyter-notebook \
|
||||
--param APPLICATION_NAME=mynotebook \
|
||||
--param NOTEBOOK_IMAGE=datascience-notebook \
|
||||
|
@@ -22,7 +22,7 @@ Getting Started with S2I
|
||||
|
||||
As an example of how S2I can be used to create a custom image with a bundled set of notebooks, run:
|
||||
|
||||
```
|
||||
```bash
|
||||
s2i build \
|
||||
--scripts-url https://raw.githubusercontent.com/jupyter/docker-stacks/master/examples/source-to-image \
|
||||
--context-dir docs/source/examples/Notebook \
|
||||
@@ -76,7 +76,7 @@ The supplied ``assemble`` script performs a few key steps.
|
||||
|
||||
The first steps copy files into the location they need to be when the image is run, from the directory where they are initially placed by the ``s2i`` command.
|
||||
|
||||
```
|
||||
```bash
|
||||
cp -Rf /tmp/src/. /home/$NB_USER
|
||||
|
||||
rm -rf /tmp/src
|
||||
@@ -84,7 +84,7 @@ rm -rf /tmp/src
|
||||
|
||||
The next steps are:
|
||||
|
||||
```
|
||||
```bash
|
||||
if [ -f /home/$NB_USER/environment.yml ]; then
|
||||
conda env update --name root --file /home/$NB_USER/environment.yml
|
||||
conda clean --all -f -y
|
||||
@@ -101,7 +101,7 @@ This means that so long as a set of notebook files provides one of these files l
|
||||
|
||||
A final step is:
|
||||
|
||||
```
|
||||
```bash
|
||||
fix-permissions $CONDA_DIR
|
||||
fix-permissions /home/$NB_USER
|
||||
```
|
||||
@@ -112,7 +112,7 @@ As long as you preserve the first and last set of steps, you can do whatever you
|
||||
|
||||
The ``run`` script in this directory is very simple and just runs the notebook application.
|
||||
|
||||
```
|
||||
```bash
|
||||
exec start-notebook.sh "$@"
|
||||
```
|
||||
|
||||
@@ -121,13 +121,13 @@ Integration with OpenShift
|
||||
|
||||
The OpenShift platform provides integrated support for S2I type builds. Templates are provided for using the S2I build mechanism with the scripts in this directory. To load the templates run:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc create -f https://raw.githubusercontent.com/jupyter/docker-stacks/master/examples/source-to-image/templates.json
|
||||
```
|
||||
|
||||
This will create the templates:
|
||||
|
||||
```
|
||||
```bash
|
||||
jupyter-notebook-builder
|
||||
jupyter-notebook-quickstart
|
||||
```
|
||||
@@ -136,7 +136,7 @@ The templates can be used from the OpenShift web console or command line. This `
|
||||
|
||||
To use the OpenShift command line to build into an image, and deploy, the set of notebooks used above, run:
|
||||
|
||||
```
|
||||
```bash
|
||||
oc new-app --template jupyter-notebook-quickstart \
|
||||
--param APPLICATION_NAME=notebook-examples \
|
||||
--param GIT_REPOSITORY_URL=https://github.com/jupyter/notebook \
|
||||
|
Reference in New Issue
Block a user