Files
docker-stacks/all-spark-notebook/Dockerfile
Peter Parente 3fd3850bdc Bump python libs to match other images
(c) Copyright IBM Corp. 2016
2016-02-13 20:49:11 -05:00

111 lines
3.8 KiB
Docker

# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
FROM jupyter/minimal-notebook
MAINTAINER Jupyter Project <jupyter@googlegroups.com>
USER root
# Util to help with kernel spec later
RUN apt-get -y update && apt-get -y install jq
# Spark dependencies
ENV APACHE_SPARK_VERSION 1.6.0
RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-7-jre-headless && \
apt-get clean
RUN cd /tmp && \
wget -q http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz && \
echo "439fe7793e0725492d3d36448adcd1db38f438dd1392bffd556b58bb9a3a2601 *spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz" | sha256sum -c - && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark
# Mesos dependencies
RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \
DISTRO=debian && \
CODENAME=wheezy && \
echo "deb http://repos.mesosphere.io/${DISTRO} ${CODENAME} main" > /etc/apt/sources.list.d/mesosphere.list && \
apt-get -y update && \
apt-get --no-install-recommends -y --force-yes install mesos=0.22.1-1.0.debian78 && \
apt-get clean
# Scala Spark kernel (build and cleanup)
RUN cd /tmp && \
echo deb http://dl.bintray.com/sbt/debian / > /etc/apt/sources.list.d/sbt.list && \
apt-key adv --keyserver keyserver.ubuntu.com --recv 99E82A75642AC823 && \
apt-get update && \
git clone https://github.com/apache/incubator-toree.git && \
apt-get install -yq --force-yes --no-install-recommends sbt && \
cd incubator-toree && \
git checkout 846292233c && \
make dist SHELL=/bin/bash && \
mv dist/toree-kernel /opt/toree-kernel && \
chmod +x /opt/toree-kernel && \
rm -rf ~/.ivy2 && \
rm -rf ~/.sbt && \
rm -rf /tmp/incubator-toree && \
apt-get remove -y sbt && \
apt-get clean
# Spark and Mesos pointers
ENV SPARK_HOME /usr/local/spark
ENV R_LIBS_USER $SPARK_HOME/R/lib
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.9-src.zip
ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
# R pre-requisites
RUN apt-get update && \
apt-get install -y --no-install-recommends \
fonts-dejavu \
gfortran \
gcc && apt-get clean
USER jovyan
# Install Python 3 packages
RUN conda install --yes \
'ipywidgets=4.1*' \
'pandas=0.17*' \
'matplotlib=1.5*' \
'scipy=0.17*' \
'seaborn=0.7*' \
'scikit-learn=0.17*' \
&& conda clean -yt
# Install Python 2 packages and kernel spec
RUN conda create -p $CONDA_DIR/envs/python2 python=2.7 \
'ipython=4.1*' \
'ipywidgets=4.1*' \
'pandas=0.17*' \
'matplotlib=1.5*' \
'scipy=0.17*' \
'seaborn=0.7*' \
'scikit-learn=0.17*' \
pyzmq \
&& conda clean -yt
# R packages
RUN conda config --add channels r
RUN conda install --yes \
'r-base=3.2*' \
'r-irkernel=0.5*' \
'r-ggplot2=1.0*' \
'r-rcurl=1.95*' && conda clean -yt
# Scala Spark kernel spec
RUN mkdir -p /opt/conda/share/jupyter/kernels/scala
COPY kernel.json /opt/conda/share/jupyter/kernels/scala/
# Install Python 2 kernel spec into the Python 3 conda environment which
# runs the notebook server
RUN bash -c '. activate python2 && \
python -m ipykernel.kernelspec --prefix=$CONDA_DIR && \
. deactivate'
# Set PYSPARK_HOME in the python2 spec
RUN jq --arg v "$CONDA_DIR/envs/python2/bin/python" \
'.["env"]["PYSPARK_PYTHON"]=$v' \
$CONDA_DIR/share/jupyter/kernels/python2/kernel.json > /tmp/kernel.json && \
mv /tmp/kernel.json $CONDA_DIR/share/jupyter/kernels/python2/kernel.json