diff --git a/.github/actions/download-manifests/action.yml b/.github/actions/download-manifests/action.yml index 1d77b330..25ab386b 100644 --- a/.github/actions/download-manifests/action.yml +++ b/.github/actions/download-manifests/action.yml @@ -63,6 +63,11 @@ runs: with: name: tensorflow-notebook-amd64-history_line path: ${{ inputs.histLineDir }} + - name: Download artifact 📥 + uses: actions/download-artifact@v3 + with: + name: datascience-notebook-aarch64-history_line + path: ${{ inputs.histLineDir }} - name: Download artifact 📥 uses: actions/download-artifact@v3 with: @@ -134,6 +139,11 @@ runs: with: name: tensorflow-notebook-amd64-manifest path: ${{ inputs.manifestDir }} + - name: Download artifact 📥 + uses: actions/download-artifact@v3 + with: + name: datascience-notebook-aarch64-manifest + path: ${{ inputs.manifestDir }} - name: Download artifact 📥 uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/docker-tag-manifest-push.yml b/.github/workflows/docker-tag-manifest-push.yml index df4260eb..ced13f63 100644 --- a/.github/workflows/docker-tag-manifest-push.yml +++ b/.github/workflows/docker-tag-manifest-push.yml @@ -80,13 +80,13 @@ jobs: shell: bash - name: Login to Docker Hub 🔐 - if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main' || github.event_name == 'schedule' + if: github.ref == 'refs/heads/main' || github.event_name == 'schedule' uses: docker/login-action@49ed152c8eca782a232dede0303416e8f356c37b # dependabot updates to latest release with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Push Images to Docker Hub 📤 - if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main' || github.event_name == 'schedule' + if: github.ref == 'refs/heads/main' || github.event_name == 'schedule' run: docker push --all-tags jupyter/${{ matrix.image }} shell: bash diff --git a/.github/workflows/docker-wiki-update.yml b/.github/workflows/docker-wiki-update.yml index 77708991..77563248 100644 --- a/.github/workflows/docker-wiki-update.yml +++ b/.github/workflows/docker-wiki-update.yml @@ -39,8 +39,8 @@ jobs: shell: bash - name: Push Wiki to GitHub 📤 - if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main' || github.event_name == 'schedule' - uses: stefanzweifel/git-auto-commit-action@5804e42f86b1891093b151b6c4e78e759c746c4d # dependabot updates to latest release + if: github.ref == 'refs/heads/main' || github.event_name == 'schedule' + uses: stefanzweifel/git-auto-commit-action@49620cd3ed21ee620a48530e81dba0d139c9cb80 # dependabot updates to latest release with: commit_message: "Automated wiki publish for ${{ github.sha }}" repository: wiki/ diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 59c809aa..2b6daf1e 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -34,7 +34,6 @@ on: push: branches: - main - - master paths: - ".github/workflows/docker.yml" - ".github/workflows/docker-build-test-upload.yml" @@ -145,6 +144,15 @@ jobs: platform: amd64 runsOn: ubuntu-latest + aarch64-datascience: + needs: [aarch64-scipy] + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parentImage: scipy-notebook + image: datascience-notebook + platform: aarch64 + runsOn: ARM64 + amd64-datascience: needs: [amd64-scipy] uses: ./.github/workflows/docker-build-test-upload.yml @@ -200,6 +208,7 @@ jobs: aarch64-minimal, aarch64-scipy, aarch64-r, + aarch64-datascience, aarch64-pyspark, aarch64-all-spark, ] @@ -216,6 +225,7 @@ jobs: "minimal-notebook", "scipy-notebook", "r-notebook", + "datascience-notebook", "pyspark-notebook", "all-spark-notebook" ] diff --git a/.github/workflows/hub-overview.yml b/.github/workflows/hub-overview.yml index 0924dfa5..8a9bcc9c 100644 --- a/.github/workflows/hub-overview.yml +++ b/.github/workflows/hub-overview.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - master paths: - ".github/workflows/hub-overview.yml" diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 5fe518ed..00c29e47 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -5,7 +5,6 @@ on: push: branches: - main - - master workflow_dispatch: permissions: diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml index b990a6bc..ef219956 100644 --- a/.github/workflows/sphinx.yml +++ b/.github/workflows/sphinx.yml @@ -15,7 +15,6 @@ on: push: branches: - main - - master paths: - ".github/workflows/sphinx.yml" diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index c2c72f8c..90401f93 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,3 +1,3 @@ # Project `jupyter/docker-stacks` Code of Conduct -Please see the [Project Jupyter Code of Conduct](https://github.com/jupyter/governance/blob/master/conduct/code_of_conduct.md). +Please see the [Project Jupyter Code of Conduct](https://github.com/jupyter/governance/blob/HEAD/conduct/code_of_conduct.md). diff --git a/README.md b/README.md index 4a22502a..04ebd25c 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ [![GitHub actions badge](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml/badge.svg)](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml "Docker images build status") [![Read the Docs badge](https://img.shields.io/readthedocs/jupyter-docker-stacks.svg)](https://jupyter-docker-stacks.readthedocs.io/en/latest/ "Documentation build status") -[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/jupyter/docker-stacks/master.svg)](https://results.pre-commit.ci/latest/github/jupyter/docker-stacks/master "pre-commit.ci build status") +[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/jupyter/docker-stacks/main.svg)](https://results.pre-commit.ci/latest/github/jupyter/docker-stacks/main "pre-commit.ci build status") [![Discourse badge](https://img.shields.io/discourse/users.svg?color=%23f37626&server=https%3A%2F%2Fdiscourse.jupyter.org)](https://discourse.jupyter.org/ "Jupyter Discourse Forum") -[![Binder badge](https://static.mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jupyter/docker-stacks/master?filepath=README.ipynb "Launch a jupyter/base-notebook container on mybinder.org") +[![Binder badge](https://static.mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jupyter/docker-stacks/main?filepath=README.ipynb "Launch a jupyter/base-notebook container on mybinder.org") Jupyter Docker Stacks are a set of ready-to-run [Docker images](https://hub.docker.com/u/jupyter) containing Jupyter applications and interactive computing tools. You can use a stack image to do any of the following (and more): @@ -16,7 +16,7 @@ You can use a stack image to do any of the following (and more): ## Quick Start -You can try a [relatively recent build of the jupyter/base-notebook image on mybinder.org](https://mybinder.org/v2/gh/jupyter/docker-stacks/master?urlpath=lab/tree/README.ipynb) +You can try a [relatively recent build of the jupyter/base-notebook image on mybinder.org](https://mybinder.org/v2/gh/jupyter/docker-stacks/main?urlpath=lab/tree/README.ipynb) by simply clicking the preceding link. Otherwise, the examples below may help you get started if you [have Docker installed](https://docs.docker.com/install/), know [which Docker image](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html) you want to use @@ -26,11 +26,11 @@ The [User Guide on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/ **Example 1:** -This command pulls the `jupyter/scipy-notebook` image tagged `6b49f3337709` from Docker Hub if it is not already present on the local host. +This command pulls the `jupyter/scipy-notebook` image tagged `807999a41207` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Server and exposes the container's internal port `8888` to port `10000` of the host machine: ```bash -docker run -p 10000:8888 jupyter/scipy-notebook:6b49f3337709 +docker run -p 10000:8888 jupyter/scipy-notebook:807999a41207 ``` You can modify the port on which the container's port is exposed by [changing the value of the `-p` option](https://docs.docker.com/engine/reference/run/#expose-incoming-ports) to `-p 8888:8888`. @@ -45,11 +45,11 @@ The container remains intact for restart after the Jupyter Server exits. **Example 2:** -This command pulls the `jupyter/datascience-notebook` image tagged `6b49f3337709` from Docker Hub if it is not already present on the local host. +This command pulls the `jupyter/datascience-notebook` image tagged `807999a41207` from Docker Hub if it is not already present on the local host. It then starts an _ephemeral_ container running a Jupyter Server and exposes the server on host port 10000. ```bash -docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work jupyter/datascience-notebook:6b49f3337709 +docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work jupyter/datascience-notebook:807999a41207 ``` The use of the `-v` flag in the command mounts the current working directory on the host (`{PWD}` in the example command) as `/home/jovyan/work` in the container. @@ -115,6 +115,6 @@ This change is tracked in the issue [#1217](https://github.com/jupyter/docker-st ## CPU Architectures -- We publish containers for both `amd64` (`x86_64`) and `aarch64` platforms, except for `datascience-notebook` and `tensorflow-notebook`, which only support `amd64` for now +- We publish containers for both `amd64` (`x86_64`) and `aarch64` platforms, except for `tensorflow-notebook`, which only supports `amd64` for now - We do not create multi-platform images - Instead, all `arm64` images have _aarch64-_ tag prefix, for example `jupyter/base-notebook:aarch64-python-3.10.5` diff --git a/binder/Dockerfile b/binder/Dockerfile index b1087b1c..0081709a 100644 --- a/binder/Dockerfile +++ b/binder/Dockerfile @@ -3,7 +3,7 @@ # https://hub.docker.com/r/jupyter/base-notebook/tags ARG OWNER=jupyter -ARG BASE_CONTAINER=$OWNER/base-notebook:6b49f3337709 +ARG BASE_CONTAINER=$OWNER/base-notebook:807999a41207 FROM $BASE_CONTAINER LABEL maintainer="Jupyter Project " @@ -12,6 +12,6 @@ LABEL maintainer="Jupyter Project " # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] -ENV TAG="6b49f3337709" +ENV TAG="807999a41207" COPY --chown=${NB_UID}:${NB_GID} binder/README.ipynb "${HOME}"/README.ipynb diff --git a/binder/README.ipynb b/binder/README.ipynb index d521a290..4116a188 100644 --- a/binder/README.ipynb +++ b/binder/README.ipynb @@ -60,7 +60,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Conda is available in the user's path." + "`mamba` is available in the user's path." ] }, { diff --git a/datascience-notebook/Dockerfile b/datascience-notebook/Dockerfile index 1ad8b711..584d1f53 100644 --- a/datascience-notebook/Dockerfile +++ b/datascience-notebook/Dockerfile @@ -80,17 +80,17 @@ RUN mamba install --quiet --yes \ 'r-rsqlite' \ 'r-shiny' \ 'r-tidyverse' \ - 'rpy2' \ 'unixodbc' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" -# `r-tidymodels` is not easy to install under arm +# `rpy2` and `r-tidymodels` are not easy to install under aarch64 RUN set -x && \ arch=$(uname -m) && \ if [ "${arch}" == "x86_64" ]; then \ mamba install --quiet --yes \ + 'rpy2' \ 'r-tidymodels' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ diff --git a/docs/conf.py b/docs/conf.py index 970894d6..1f748265 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -99,7 +99,7 @@ linkcheck_ignore = [ ] linkcheck_allowed_redirects = { - r"https://results\.pre-commit\.ci/latest/github/jupyter/docker-stacks/master": r"https://results\.pre-commit\.ci/run/github/.*", # Latest master CI build + r"https://results\.pre-commit\.ci/latest/github/jupyter/docker-stacks/main": r"https://results\.pre-commit\.ci/run/github/.*", # Latest main CI build r"https://github\.com/jupyter/docker-stacks/issues/new.*": r"https://github\.com/login.*", # GitHub wants user to be logon to use this features r"https://github\.com/orgs/jupyter/teams/docker-image-maintainers/members": r"https://github\.com/login.*", } diff --git a/docs/contributing/issues.md b/docs/contributing/issues.md index fa25b0d2..0c6c34d9 100644 --- a/docs/contributing/issues.md +++ b/docs/contributing/issues.md @@ -5,7 +5,7 @@ Please review the following guidelines when reporting your problem. - If you believe you’ve found a security vulnerability in any of the Jupyter projects included in Jupyter Docker Stacks images, please report it to [security@ipython.org](mailto:security@ipython.org), **not in the issue trackers on GitHub**. - If you prefer to encrypt your security reports, you can use [this PGP public key](https://github.com/jupyter/jupyter.github.io/blob/master/assets/ipython_security.asc). + If you prefer to encrypt your security reports, you can use [this PGP public key](https://github.com/jupyter/jupyter.github.io/blob/HEAD/assets/ipython_security.asc). - If you think your problem is unique to the Jupyter Docker Stacks images, please search the [jupyter/docker-stacks issue tracker](https://github.com/jupyter/docker-stacks/issues) to see if someone else has already reported the same problem. diff --git a/docs/contributing/stacks.md b/docs/contributing/stacks.md index 77da1778..8d37c350 100644 --- a/docs/contributing/stacks.md +++ b/docs/contributing/stacks.md @@ -80,12 +80,11 @@ The cookiecutter template comes with a `.github/workflows/docker.yml` file, whic push: branches: - main - - master paths-ignore: - "*.md" ``` - This will trigger the CI pipeline whenever you push to your `main` or `master` branch and when any Pull Requests are made to your repository. + This will trigger the CI pipeline whenever you push to your `main` branch and when any Pull Requests are made to your repository. For more details on this configuration, visit the [GitHub actions documentation on triggers](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows). 2. Commit your changes and push to GitHub. @@ -99,7 +98,7 @@ The cookiecutter template comes with a `.github/workflows/docker.yml` file, whic ## Configuring Docker Hub Now, configure Docker Hub to build your stack image and push it to Docker Hub repository whenever -you merge a GitHub pull request to the master branch of your project. +you merge a GitHub pull request to the main branch of your project. 1. Visit [https://hub.docker.com/](https://hub.docker.com/) and log in. 2. Select the account or organization matching the one you entered when prompted with `stack_org` by the cookiecutter. @@ -137,14 +136,13 @@ you merge a GitHub pull request to the master branch of your project. Make edits to the Dockerfile in your project to add third-party libraries and configure Jupyter applications. -Refer to the Dockerfiles for the core stacks (e.g., [jupyter/datascience-notebook](https://github.com/jupyter/docker-stacks/blob/master/datascience-notebook/Dockerfile)) +Refer to the Dockerfiles for the core stacks (e.g., [jupyter/datascience-notebook](https://github.com/jupyter/docker-stacks/blob/main/datascience-notebook/Dockerfile)) to get a feel for what's possible and best practices. [Submit pull requests](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) to your project repository on GitHub. -Ensure your image builds correctly on GitHub actions before merging to -master or main. -Refer to Docker Hub to build your master or main branch that you can `docker pull`. +Ensure your image builds correctly on GitHub actions before merging to main branch. +Refer to Docker Hub to build your main branch that you can `docker pull`. ## Sharing Your Image diff --git a/docs/contributing/tests.md b/docs/contributing/tests.md index 1de1d259..2c5578a6 100644 --- a/docs/contributing/tests.md +++ b/docs/contributing/tests.md @@ -4,7 +4,7 @@ We greatly appreciate pull requests that extend the automated tests that vet the ## How the Tests Work -A [GitHub Action workflow](https://github.com/jupyter/docker-stacks/blob/master/.github/workflows/docker.yml) +A [GitHub Action workflow](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/docker.yml) runs tests against pull requests submitted to the `jupyter/docker-stacks` repository. We use `pytest` module to run tests on the image. @@ -18,7 +18,7 @@ If your test is located in `tests/-notebook/`, it will be run against ``` Many tests make use of global [pytest fixtures](https://docs.pytest.org/en/latest/reference/fixtures.html) -defined in the [conftest.py](https://github.com/jupyter/docker-stacks/blob/master/tests/conftest.py) file. +defined in the [conftest.py](https://github.com/jupyter/docker-stacks/blob/main/tests/conftest.py) file. ## Unit tests diff --git a/docs/maintaining/tasks.md b/docs/maintaining/tasks.md index 4c84ca40..2f35ede8 100644 --- a/docs/maintaining/tasks.md +++ b/docs/maintaining/tasks.md @@ -10,10 +10,10 @@ To build new images and publish them to the Docker Hub registry, do the followin ```{note} We think GitHub Actions are quite reliable, so please, investigate if some error occurs. - Building Docker images in PRs is exactly the same after merging to master, except there is an additional `push` step. + Building Docker images in PRs is exactly the same after merging to main, except there is an additional `push` step. ``` -4. Try to avoid merging another PR to master until all pending builds are complete. +4. Try to avoid merging another PR to main branch until all pending builds are complete. This way, you will know which commit might have broken the build and also have correct tags for moving tags (like `python` version). ## Updating the Ubuntu Base Image @@ -34,10 +34,10 @@ In general, we do not add new core images and ask contributors to either create When there's a new stack definition, do the following before merging the PR with the new stack: 1. Ensure the PR includes an update to the stack overview diagram - [in the documentation](https://github.com/jupyter/docker-stacks/blob/master/docs/using/selecting.md#image-relationships). + [in the documentation](https://github.com/jupyter/docker-stacks/blob/main/docs/using/selecting.md#image-relationships). The image links to the [blockdiag source](http://interactive.blockdiag.com/) used to create it. -2. Ensure the PR updates the [Makefile](https://github.com/jupyter/docker-stacks/blob/master/Makefile), which is used to build the stacks in order on GitHub Actions. -3. Ensure necessary tags / manifests are added for the new image in the [tagging](https://github.com/jupyter/docker-stacks/tree/master/tagging) folder. +2. Ensure the PR updates the [Makefile](https://github.com/jupyter/docker-stacks/blob/main/Makefile), which is used to build the stacks in order on GitHub Actions. +3. Ensure necessary tags / manifests are added for the new image in the [tagging](https://github.com/jupyter/docker-stacks/tree/main/tagging) folder. 4. Create a new repository in the `jupyter` org on Docker Hub named after the stack folder in the git repo. 5. Grant the `stacks` team permission to write to the repo. diff --git a/docs/using/common.md b/docs/using/common.md index 6e910a30..6239168b 100644 --- a/docs/using/common.md +++ b/docs/using/common.md @@ -133,7 +133,7 @@ or executables (`chmod +x`) to be run to the paths below: - `/usr/local/bin/before-notebook.d/` - handled **after** all the standard options noted above are applied and ran right before the notebook server launches -See the `run-hooks` function in the [`jupyter/base-notebook start.sh`](https://github.com/jupyter/docker-stacks/blob/master/base-notebook/start.sh) +See the `run-hooks` function in the [`jupyter/base-notebook start.sh`](https://github.com/jupyter/docker-stacks/blob/main/base-notebook/start.sh) script for execution details. ## SSL Certificates @@ -166,10 +166,10 @@ The certificate file or PEM may contain one or more certificates (e.g., server, For additional information about using SSL, see the following: -- The [docker-stacks/examples](https://github.com/jupyter/docker-stacks/tree/master/examples) +- The [docker-stacks/examples](https://github.com/jupyter/docker-stacks/tree/main/examples) for information about how to use [Let's Encrypt](https://letsencrypt.org/) certificates when you run these stacks on a publicly visible domain. -- The [`jupyter_server_config.py`](https://github.com/jupyter/docker-stacks/blob/master/base-notebook/jupyter_server_config.py) +- The [`jupyter_server_config.py`](https://github.com/jupyter/docker-stacks/blob/main/base-notebook/jupyter_server_config.py) file for how this Docker image generates a self-signed certificate. - The [Jupyter Server documentation](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html#securing-a-jupyter-server) for best practices about securing a public notebook server in general. diff --git a/docs/using/recipes.md b/docs/using/recipes.md index 799931f8..1cee3b25 100644 --- a/docs/using/recipes.md +++ b/docs/using/recipes.md @@ -29,7 +29,7 @@ Create a new Dockerfile like the one shown below. ```dockerfile # Start from a core stack version -FROM jupyter/datascience-notebook:6b49f3337709 +FROM jupyter/datascience-notebook:807999a41207 # Install in the default python3 environment RUN pip install --quiet --no-cache-dir 'flake8==3.9.2' && \ fix-permissions "${CONDA_DIR}" && \ @@ -48,7 +48,7 @@ Next, create a new Dockerfile like the one shown below. ```dockerfile # Start from a core stack version -FROM jupyter/datascience-notebook:6b49f3337709 +FROM jupyter/datascience-notebook:807999a41207 # Install from requirements.txt file COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/ RUN pip install --quiet --no-cache-dir --requirement /tmp/requirements.txt && \ @@ -60,7 +60,7 @@ For conda, the Dockerfile is similar: ```dockerfile # Start from a core stack version -FROM jupyter/datascience-notebook:6b49f3337709 +FROM jupyter/datascience-notebook:807999a41207 # Install from requirements.txt file COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/ RUN mamba install --yes --file /tmp/requirements.txt && \ @@ -148,7 +148,7 @@ Ref: ## Let's Encrypt a Notebook server See the README for a basic automation here - + which includes steps for requesting and renewing a Let's Encrypt certificate. Ref: @@ -283,7 +283,7 @@ To use a specific version of JupyterHub, the version of `jupyterhub` in your ima version in the Hub itself. ```dockerfile -FROM jupyter/base-notebook:6b49f3337709 +FROM jupyter/base-notebook:807999a41207 RUN pip install --quiet --no-cache-dir jupyterhub==1.4.1 && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" @@ -474,7 +474,7 @@ For JupyterLab: ```bash docker run -it --rm \ - jupyter/base-notebook:6b49f3337709 \ + jupyter/base-notebook:807999a41207 \ start.sh jupyter lab --LabApp.token='' ``` @@ -482,7 +482,7 @@ For jupyter classic: ```bash docker run -it --rm \ - jupyter/base-notebook:6b49f3337709 \ + jupyter/base-notebook:807999a41207 \ start.sh jupyter notebook --NotebookApp.token='' ``` diff --git a/docs/using/running.md b/docs/using/running.md index c8222207..3dbd9b16 100644 --- a/docs/using/running.md +++ b/docs/using/running.md @@ -15,12 +15,12 @@ The following are some common patterns. **Example 1:** -This command pulls the `jupyter/scipy-notebook` image tagged `6b49f3337709` from Docker Hub if it is not already present on the local host. +This command pulls the `jupyter/scipy-notebook` image tagged `807999a41207` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 8888. The server logs appear in the terminal and include a URL to the notebook server. ```bash -docker run -it -p 8888:8888 jupyter/scipy-notebook:6b49f3337709 +docker run -it -p 8888:8888 jupyter/scipy-notebook:807999a41207 # Entered start.sh with args: jupyter lab @@ -39,7 +39,7 @@ Pressing `Ctrl-C` twice shuts down the notebook server but leaves the container # list containers docker ps -a # CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -# 221331c047c4 jupyter/scipy-notebook:6b49f3337709 "tini -g -- start-no…" 11 seconds ago Exited (0) 8 seconds ago cranky_benz +# 221331c047c4 jupyter/scipy-notebook:807999a41207 "tini -g -- start-no…" 11 seconds ago Exited (0) 8 seconds ago cranky_benz # start the stopped container docker start -a 221331c047c4 @@ -53,12 +53,12 @@ docker rm 221331c047c4 **Example 2:** -This command pulls the `jupyter/r-notebook` image tagged `6b49f3337709` from Docker Hub if it is not already present on the local host. +This command pulls the `jupyter/r-notebook` image tagged `807999a41207` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Notebook server and exposes the server on host port 10000. The server logs appear in the terminal and include a URL to the notebook server, but with the internal container port (8888) instead of the correct host port (10000). ```bash -docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work jupyter/r-notebook:6b49f3337709 +docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work jupyter/r-notebook:807999a41207 ``` Pressing `Ctrl-C` twice shuts down the notebook server and immediately destroys the Docker container. @@ -130,7 +130,7 @@ subuidSize=$(( $(podman info --format "{{ range .Host.IDMappings.UIDMap }}+{{.Si subgidSize=$(( $(podman info --format "{{ range .Host.IDMappings.GIDMap }}+{{.Size }}{{end }}" ) - 1 )) ``` -This command pulls the `docker.io/jupyter/r-notebook` image tagged `6b49f3337709` from Docker Hub if it is not already present on the local host. +This command pulls the `docker.io/jupyter/r-notebook` image tagged `807999a41207` from Docker Hub if it is not already present on the local host. It then starts a container running a Jupyter Server and exposes the server on host port 10000. The server logs appear in the terminal and include a URL to the notebook server, but with the internal container port (8888) instead of the correct host port (10000). @@ -139,7 +139,7 @@ podman run -it --rm -p 10000:8888 \ -v "${PWD}":/home/jovyan/work --user $uid:$gid \ --uidmap $uid:0:1 --uidmap 0:1:$uid --uidmap $(($uid+1)):$(($uid+1)):$(($subuidSize-$uid)) \ --gidmap $gid:0:1 --gidmap 0:1:$gid --gidmap $(($gid+1)):$(($gid+1)):$(($subgidSize-$gid)) \ - docker.io/jupyter/r-notebook:6b49f3337709 + docker.io/jupyter/r-notebook:807999a41207 ``` ```{warning} diff --git a/docs/using/selecting.md b/docs/using/selecting.md index b003a7c3..fdc7bc1d 100644 --- a/docs/using/selecting.md +++ b/docs/using/selecting.md @@ -18,8 +18,8 @@ The following sections describe these images, including their contents, relation ### jupyter/base-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/base-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/base-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/base-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/base-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/base-notebook/tags/) `jupyter/base-notebook` is a small image supporting the [options common across all core stacks](common.md). @@ -40,8 +40,8 @@ It is the basis for all other stacks and contains: ### jupyter/minimal-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/minimal-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/minimal-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/minimal-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/minimal-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/minimal-notebook/tags/) `jupyter/minimal-notebook` adds command-line tools useful when working in Jupyter applications. @@ -56,8 +56,8 @@ It contains: ### jupyter/r-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/r-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/r-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/r-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/r-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/r-notebook/tags/) `jupyter/r-notebook` includes popular packages from the R ecosystem listed below: @@ -87,8 +87,8 @@ It contains: ### jupyter/scipy-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/scipy-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/scipy-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/scipy-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/scipy-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/scipy-notebook/tags/) `jupyter/scipy-notebook` includes popular packages from the scientific Python ecosystem. @@ -129,8 +129,8 @@ It contains: ### jupyter/tensorflow-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/tensorflow-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/tensorflow-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/tensorflow-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/tensorflow-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/tensorflow-notebook/tags/) `jupyter/tensorflow-notebook` includes popular Python deep learning libraries. @@ -140,8 +140,8 @@ It contains: ### jupyter/datascience-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/datascience-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/datascience-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/datascience-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/datascience-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/datascience-notebook/tags/) `jupyter/datascience-notebook` includes libraries for data analysis from the Julia, Python, and R @@ -159,8 +159,8 @@ communities. ### jupyter/pyspark-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/pyspark-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/pyspark-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/pyspark-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/pyspark-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/pyspark-notebook/tags/) `jupyter/pyspark-notebook` includes Python support for Apache Spark. @@ -171,8 +171,8 @@ communities. ### jupyter/all-spark-notebook -[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/master/all-spark-notebook) | -[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/master/all-spark-notebook/Dockerfile) | +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/all-spark-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/all-spark-notebook/Dockerfile) | [Docker Hub image tags](https://hub.docker.com/r/jupyter/all-spark-notebook/tags/) `jupyter/all-spark-notebook` includes Python and R support for Apache Spark. diff --git a/docs/using/specifics.md b/docs/using/specifics.md index 04eef9cf..626978a1 100644 --- a/docs/using/specifics.md +++ b/docs/using/specifics.md @@ -41,37 +41,43 @@ ipython profile create You can build a `pyspark-notebook` image (and also the downstream `all-spark-notebook` image) with a different version of Spark by overriding the default value of the following arguments at build time. -- Spark distribution is defined by the combination of the Spark and the Hadoop version and verified by the package checksum, +- Spark distribution is defined by the combination of Spark, Hadoop and Scala versions and verified by the package checksum, see [Download Apache Spark](https://spark.apache.org/downloads.html) and the [archive repo](https://archive.apache.org/dist/spark/) for more information. - - `spark_version`: The Spark version to install (`3.0.0`). - - `hadoop_version`: The Hadoop version (`3.2`). - - `spark_checksum`: The package checksum (`BFE4540...`). -- Spark can run with different OpenJDK versions. - - `openjdk_version`: The version of (JRE headless) the OpenJDK distribution (`11`), see [Ubuntu packages](https://packages.ubuntu.com/search?keywords=openjdk). -For example here is how to build a `pyspark-notebook` image with Spark `2.4.7`, Hadoop `2.7` and OpenJDK `8`. + - `spark_version`: The Spark version to install (`3.3.0`). + - `hadoop_version`: The Hadoop version (`3.2`). + - `scala_version`: The Scala version (`2.13`). + - `spark_checksum`: The package checksum (`BFE4540...`). + - `openjdk_version`: The version of the OpenJDK (JRE headless) distribution (`17`). + - This version needs to match the version supported by the Spark distribution used above. + - See [Spark Overview](https://spark.apache.org/docs/latest/#downloading) and [Ubuntu packages](https://packages.ubuntu.com/search?keywords=openjdk). + +- Starting with _Spark >= 3.2_ the distribution file contains Scala version, hence building older Spark will not work. +- Building older version requires modification to the Dockerfile or using it's older version of the Dockerfile. + +For example here is how to build a `pyspark-notebook` image with Spark `3.2.0`, Hadoop `3.2` and OpenJDK `11`. ```bash # From the root of the project # Build the image with different arguments docker build --rm --force-rm \ - -t jupyter/pyspark-notebook:spark-2.4.7 ./pyspark-notebook \ - --build-arg spark_version=2.4.7 \ - --build-arg hadoop_version=2.7 \ - --build-arg spark_checksum=0F5455672045F6110B030CE343C049855B7BA86C0ECB5E39A075FF9D093C7F648DA55DED12E72FFE65D84C32DCD5418A6D764F2D6295A3F894A4286CC80EF478 \ - --build-arg openjdk_version=8 + -t jupyter/pyspark-notebook:spark-3.2.0 ./pyspark-notebook \ + --build-arg spark_version=3.2.0 \ + --build-arg hadoop_version=3.2 \ + --build-arg spark_checksum=707DDE035926A50B75E53FCA72CADA519F3239B14A96546911CB4916A58DCF69A1D2BFDD2C7DD5899324DBD82B6EEAB9797A7B4ABF86736FFCA4C26D0E0BF0EE \ + --build-arg openjdk_version=11 # Check the newly built image -docker run -it --rm jupyter/pyspark-notebook:spark-2.4.7 pyspark --version +docker run -it --rm jupyter/pyspark-notebook:spark-3.2.0 pyspark --version # Welcome to # ____ __ # / __/__ ___ _____/ /__ # _\ \/ _ \/ _ `/ __/ '_/ -# /___/ .__/\_,_/_/ /_/\_\ version 2.4.7 +# /___/ .__/\_,_/_/ /_/\_\ version 3.2.0 # /_/ -# -# Using Scala version 2.11.12, OpenJDK 64-Bit Server VM, 1.8.0_275 + +# Using Scala version 2.13.5, OpenJDK 64-Bit Server VM, 11.0.15 ``` ### Usage Examples diff --git a/examples/docker-compose/README.md b/examples/docker-compose/README.md index cda47e5c..7963a5d9 100644 --- a/examples/docker-compose/README.md +++ b/examples/docker-compose/README.md @@ -42,7 +42,7 @@ You can customize the docker-stack notebook image to deploy by modifying the `no For example, you can build and deploy a `jupyter/all-spark-notebook` by modifying the Dockerfile like so: ```dockerfile -FROM jupyter/all-spark-notebook:6b49f3337709 +FROM jupyter/all-spark-notebook:807999a41207 # Your RUN commands and so on ``` diff --git a/examples/docker-compose/notebook/Dockerfile b/examples/docker-compose/notebook/Dockerfile index afecc19c..eca53ad1 100644 --- a/examples/docker-compose/notebook/Dockerfile +++ b/examples/docker-compose/notebook/Dockerfile @@ -2,7 +2,7 @@ # Distributed under the terms of the Modified BSD License. # Pick your favorite docker-stacks image -FROM jupyter/minimal-notebook:6b49f3337709 +FROM jupyter/minimal-notebook:807999a41207 USER root diff --git a/examples/make-deploy/Dockerfile b/examples/make-deploy/Dockerfile index afecc19c..eca53ad1 100644 --- a/examples/make-deploy/Dockerfile +++ b/examples/make-deploy/Dockerfile @@ -2,7 +2,7 @@ # Distributed under the terms of the Modified BSD License. # Pick your favorite docker-stacks image -FROM jupyter/minimal-notebook:6b49f3337709 +FROM jupyter/minimal-notebook:807999a41207 USER root diff --git a/examples/source-to-image/README.md b/examples/source-to-image/README.md index f8a88bb8..1e9d3ba9 100644 --- a/examples/source-to-image/README.md +++ b/examples/source-to-image/README.md @@ -31,7 +31,7 @@ As an example of how S2I can be used to create a custom image with a bundled set ```bash s2i build \ - --scripts-url https://raw.githubusercontent.com/jupyter/docker-stacks/master/examples/source-to-image \ + --scripts-url https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image \ --context-dir docs/source/examples/Notebook \ https://github.com/jupyter/notebook \ jupyter/minimal-notebook:latest \ @@ -127,7 +127,7 @@ Templates are provided for using the S2I build mechanism with the scripts in thi To load the templates run: ```bash -oc create -f https://raw.githubusercontent.com/jupyter/docker-stacks/master/examples/source-to-image/templates.json +oc create -f https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image/templates.json ``` This will create the templates: diff --git a/examples/source-to-image/templates.json b/examples/source-to-image/templates.json index a057c11e..3f6817af 100644 --- a/examples/source-to-image/templates.json +++ b/examples/source-to-image/templates.json @@ -27,7 +27,7 @@ }, { "name": "BUILDER_SCRIPTS", - "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/master/examples/source-to-image", + "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image", "required": true }, { @@ -37,7 +37,7 @@ }, { "name": "GIT_REFERENCE", - "value": "master", + "value": "main", "required": true }, { @@ -130,7 +130,7 @@ }, { "name": "BUILDER_SCRIPTS", - "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/master/examples/source-to-image", + "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image", "required": true }, { @@ -140,7 +140,7 @@ }, { "name": "GIT_REFERENCE", - "value": "master", + "value": "main", "required": true }, { diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index 6be3c924..1163e692 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -15,10 +15,11 @@ USER root # Spark dependencies # Default values can be overridden at build time # (ARGS are in lower case to distinguish them from ENV) -ARG spark_version="3.2.1" -ARG hadoop_version="3.2" -ARG spark_checksum="145ADACF189FECF05FBA3A69841D2804DD66546B11D14FC181AC49D89F3CB5E4FECD9B25F56F0AF767155419CD430838FB651992AEB37D3A6F91E7E009D1F9AE" -ARG openjdk_version="11" +ARG spark_version="3.3.0" +ARG hadoop_version="3" +ARG scala_version="2.13" +ARG spark_checksum="4c09dac70e22bf1d5b7b2cabc1dd92aba13237f52a5b682c67982266fc7a0f5e0f964edff9bc76adbd8cb444eb1a00fdc59516147f99e4e2ce068420ff4881f0" +ARG openjdk_version="17" ENV APACHE_SPARK_VERSION="${spark_version}" \ HADOOP_VERSION="${hadoop_version}" @@ -31,10 +32,10 @@ RUN apt-get update --yes && \ # Spark installation WORKDIR /tmp -RUN wget -q "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ - echo "${spark_checksum} *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \ - tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \ - rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" +RUN wget -q "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz" && \ + echo "${spark_checksum} *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz" | sha512sum -c - && \ + tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz" -C /usr/local --owner root --group root --no-same-owner && \ + rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz" WORKDIR /usr/local @@ -43,17 +44,11 @@ ENV SPARK_HOME=/usr/local/spark ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \ PATH="${PATH}:${SPARK_HOME}/bin" -RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark && \ +RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}" spark && \ # Add a link in the before_notebook hook in order to source automatically PYTHONPATH mkdir -p /usr/local/bin/before-notebook.d && \ ln -s "${SPARK_HOME}/sbin/spark-config.sh" /usr/local/bin/before-notebook.d/spark-config.sh -# Fix Spark installation for Java 11 and Apache Arrow library -# see: https://github.com/apache/spark/pull/27356, https://spark.apache.org/docs/latest/#downloading -RUN cp -p "${SPARK_HOME}/conf/spark-defaults.conf.template" "${SPARK_HOME}/conf/spark-defaults.conf" && \ - echo 'spark.driver.extraJavaOptions -Dio.netty.tryReflectionSetAccessible=true' >> "${SPARK_HOME}/conf/spark-defaults.conf" && \ - echo 'spark.executor.extraJavaOptions -Dio.netty.tryReflectionSetAccessible=true' >> "${SPARK_HOME}/conf/spark-defaults.conf" - # Configure IPython system-wide COPY ipython_kernel_config.py "/etc/ipython/" RUN fix-permissions "/etc/ipython/" diff --git a/tests/all-spark-notebook/test_spark_notebooks.py b/tests/all-spark-notebook/test_spark_notebooks.py index 34f8d565..c1abb218 100644 --- a/tests/all-spark-notebook/test_spark_notebooks.py +++ b/tests/all-spark-notebook/test_spark_notebooks.py @@ -32,14 +32,10 @@ def test_nbconvert(container: TrackedContainer, test_file: str) -> None: ) logs = container.run_and_wait( timeout=60, - no_warnings=False, volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, tty=True, command=["start.sh", "bash", "-c", command], ) - warnings = TrackedContainer.get_warnings(logs) - # Some Spark warnings - assert len(warnings) == 5 expected_file = f"{output_dir}/{test_file}.md" assert expected_file in logs, f"Expected file {expected_file} not generated" diff --git a/tests/pyspark-notebook/test_spark.py b/tests/pyspark-notebook/test_spark.py index da47e19f..eb721bc1 100644 --- a/tests/pyspark-notebook/test_spark.py +++ b/tests/pyspark-notebook/test_spark.py @@ -11,12 +11,8 @@ def test_spark_shell(container: TrackedContainer) -> None: """Checking if Spark (spark-shell) is running properly""" logs = container.run_and_wait( timeout=60, - no_warnings=False, tty=True, command=["start.sh", "bash", "-c", 'spark-shell <<< "1+1"'], ) - warnings = TrackedContainer.get_warnings(logs) - # Some Spark warnings - assert len(warnings) == 5 assert "res0: Int = 2" in logs, "spark-shell does not work"