mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-10 11:32:57 +00:00
Upgrade Spark to 2.2.0
This commit is contained in:
@@ -7,7 +7,7 @@ MAINTAINER Jupyter Project <jupyter@googlegroups.com>
|
||||
USER root
|
||||
|
||||
# Spark dependencies
|
||||
ENV APACHE_SPARK_VERSION 2.1.1
|
||||
ENV APACHE_SPARK_VERSION 2.2.0
|
||||
ENV HADOOP_VERSION 2.7
|
||||
|
||||
# Temporarily add jessie backports to get openjdk 8, but then remove that source
|
||||
@@ -19,7 +19,7 @@ RUN echo 'deb http://cdn-fastly.deb.debian.org/debian jessie-backports main' > /
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
RUN cd /tmp && \
|
||||
wget -q http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
|
||||
echo "4b6427ca6dc6f888b21bff9f9a354260af4a0699a1f43caabf58ae6030951ee5fa8b976497aa33de7e4ae55609d47a80bfe66dfc48c79ea28e3e5b03bdaaba11 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
||||
echo "7a186a2a007b2dfd880571f7214a7d329c972510a460a8bdbef9f7f2a891019343c020f74b496a61e5aa42bc9e9a79cc99defe5cb3bf8b6f49c07e01b259bc6b *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
||||
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local && \
|
||||
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
|
||||
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
|
||||
|
@@ -7,7 +7,7 @@
|
||||
* Jupyter Notebook 5.0.x
|
||||
* Conda Python 3.x and Python 2.7.x environments
|
||||
* pyspark, pandas, matplotlib, scipy, seaborn, scikit-learn pre-installed
|
||||
* Spark 2.1.1 with Hadoop 2.7 for use in local mode or to connect to a cluster of Spark workers
|
||||
* Spark 2.2.0 with Hadoop 2.7 for use in local mode or to connect to a cluster of Spark workers
|
||||
* Mesos client 1.2 binary that can communicate with a Mesos master
|
||||
* Unprivileged user `jovyan` (uid=1000, configurable, see options) in group `users` (gid=100) with ownership over `/home/jovyan` and `/opt/conda`
|
||||
* [tini](https://github.com/krallin/tini) as the container entrypoint and [start-notebook.sh](../base-notebook/start-notebook.sh) as the default command
|
||||
@@ -70,8 +70,8 @@ conf = pyspark.SparkConf()
|
||||
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)
|
||||
conf.setMaster("mesos://10.10.10.10:5050")
|
||||
# point to spark binary package in HDFS or on local filesystem on all slave
|
||||
# nodes (e.g., file:///opt/spark/spark-2.1.1-bin-hadoop2.7.tgz)
|
||||
conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-2.1.1-bin-hadoop2.7.tgz")
|
||||
# nodes (e.g., file:///opt/spark/spark-2.2.0-bin-hadoop2.7.tgz)
|
||||
conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-2.2.0-bin-hadoop2.7.tgz")
|
||||
# set other options as desired
|
||||
conf.set("spark.executor.memory", "8g")
|
||||
conf.set("spark.core.connection.ack.wait.timeout", "1200")
|
||||
@@ -202,7 +202,7 @@ c.DockerSpawner.container_image = 'jupyter/pyspark-notebook'
|
||||
|
||||
# Have the Spawner override the Docker run command
|
||||
c.DockerSpawner.extra_create_kwargs.update({
|
||||
'command': '/usr/local/bin/start-singleuser.sh'
|
||||
'command': '/usr/local/bin/start-singleuser.sh'
|
||||
})
|
||||
```
|
||||
|
||||
|
Reference in New Issue
Block a user