# Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. FROM jupyter/minimal-notebook MAINTAINER Jupyter Project USER root # Util to help with kernel spec later RUN apt-get -y update && apt-get -y install jq # Spark dependencies ENV APACHE_SPARK_VERSION 1.6.0 RUN apt-get -y update && \ apt-get install -y --no-install-recommends openjdk-7-jre-headless && \ apt-get clean RUN cd /tmp && \ wget -q http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz && \ echo "439fe7793e0725492d3d36448adcd1db38f438dd1392bffd556b58bb9a3a2601 *spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz" | sha256sum -c - && \ tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz -C /usr/local && \ rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark # Mesos dependencies RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \ DISTRO=debian && \ CODENAME=wheezy && \ echo "deb http://repos.mesosphere.io/${DISTRO} ${CODENAME} main" > /etc/apt/sources.list.d/mesosphere.list && \ apt-get -y update && \ apt-get --no-install-recommends -y --force-yes install mesos=0.22.1-1.0.debian78 && \ apt-get clean # Scala Spark kernel (build and cleanup) RUN cd /tmp && \ echo deb http://dl.bintray.com/sbt/debian / > /etc/apt/sources.list.d/sbt.list && \ apt-key adv --keyserver keyserver.ubuntu.com --recv 99E82A75642AC823 && \ apt-get update && \ git clone https://github.com/apache/incubator-toree.git && \ apt-get install -yq --force-yes --no-install-recommends sbt && \ cd incubator-toree && \ git checkout 846292233c && \ make dist SHELL=/bin/bash && \ mv dist/toree-kernel /opt/toree-kernel && \ chmod +x /opt/toree-kernel && \ rm -rf ~/.ivy2 && \ rm -rf ~/.sbt && \ rm -rf /tmp/incubator-toree && \ apt-get remove -y sbt && \ apt-get clean # Spark and Mesos pointers ENV SPARK_HOME /usr/local/spark ENV R_LIBS_USER $SPARK_HOME/R/lib ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.9-src.zip ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info # R pre-requisites RUN apt-get update && \ apt-get install -y --no-install-recommends \ fonts-dejavu \ gfortran \ gcc && apt-get clean USER jovyan # Install Python 3 packages RUN conda install --yes \ 'ipywidgets=4.1*' \ 'pandas=0.17*' \ 'matplotlib=1.5*' \ 'scipy=0.17*' \ 'seaborn=0.7*' \ 'scikit-learn=0.17*' \ && conda clean -yt # Install Python 2 packages and kernel spec RUN conda create -p $CONDA_DIR/envs/python2 python=2.7 \ 'ipython=4.1*' \ 'ipywidgets=4.1*' \ 'pandas=0.17*' \ 'matplotlib=1.5*' \ 'scipy=0.17*' \ 'seaborn=0.7*' \ 'scikit-learn=0.17*' \ pyzmq \ && conda clean -yt # R packages RUN conda config --add channels r RUN conda install --yes \ 'r-base=3.2*' \ 'r-irkernel=0.5*' \ 'r-ggplot2=1.0*' \ 'r-rcurl=1.95*' && conda clean -yt # Scala Spark kernel spec RUN mkdir -p /opt/conda/share/jupyter/kernels/scala COPY kernel.json /opt/conda/share/jupyter/kernels/scala/ # Install Python 2 kernel spec into the Python 3 conda environment which # runs the notebook server RUN bash -c '. activate python2 && \ python -m ipykernel.kernelspec --prefix=$CONDA_DIR && \ . deactivate' # Set PYSPARK_HOME in the python2 spec RUN jq --arg v "$CONDA_DIR/envs/python2/bin/python" \ '.["env"]["PYSPARK_PYTHON"]=$v' \ $CONDA_DIR/share/jupyter/kernels/python2/kernel.json > /tmp/kernel.json && \ mv /tmp/kernel.json $CONDA_DIR/share/jupyter/kernels/python2/kernel.json