Files
docker-stacks/pyspark-notebook/Dockerfile
Stefan Proell 1584606a40 Adds config file initialization
Adds the initial creation of the configuration file
jupyter_notebook_config.json to the image. This file
was missing as reported in issue #639 and caused an
error when trying to set a new password via the Web
interface.

Bumps the version of Jupyter Notebook to 5.6

Updates Apache Mirror URL because the current causes build timeouts

The mirror used now (mirrors.ukfast.co.uk) is fast enough and does not
point to an ad site and is already used in a different Dockerfile within
this repository.
2018-07-19 10:17:50 +02:00

52 lines
2.2 KiB
Docker

# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
FROM jupyter/scipy-notebook
LABEL maintainer="Jupyter Project <jupyter@googlegroups.com>"
USER root
# Spark dependencies
ENV APACHE_SPARK_VERSION 2.3.1
ENV HADOOP_VERSION 2.7
RUN apt-get -y update && \
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN cd /tmp && \
wget -q http://mirrors.ukfast.co.uk/sites/ftp.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
echo "DC3A97F3D99791D363E4F70A622B84D6E313BD852F6FDBC777D31EAB44CBC112CEEAA20F7BF835492FB654F48AE57E9969F93D3B0E6EC92076D1C5E1B40B4696 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
# Mesos dependencies
# Install from the Xenial Mesosphere repository since there does not (yet)
# exist a Bionic repository and the dependencies seem to be compatible for now.
COPY mesos.key /tmp/
RUN apt-get -y update && \
apt-get install --no-install-recommends -y gnupg && \
apt-key add /tmp/mesos.key && \
echo "deb http://repos.mesosphere.io/ubuntu xenial main" > /etc/apt/sources.list.d/mesosphere.list && \
apt-get -y update && \
apt-get --no-install-recommends -y install mesos=1.2\* && \
apt-get purge --auto-remove -y gnupg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Spark and Mesos config
ENV SPARK_HOME /usr/local/spark
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
USER $NB_UID
# Install pyarrow
RUN conda install --quiet -y 'pyarrow' && \
conda clean -tipsy && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER