mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-19 07:52:56 +00:00
Merge pull request #112 from parente/fix-python2-path
Set PYSPARK_PYTHON path in python2 kernelspec
This commit is contained in:
@@ -6,6 +6,9 @@ MAINTAINER Jupyter Project <jupyter@googlegroups.com>
|
|||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
|
# Util to help with kernel spec later
|
||||||
|
RUN apt-get -y update && apt-get -y install jq
|
||||||
|
|
||||||
# Spark dependencies
|
# Spark dependencies
|
||||||
ENV APACHE_SPARK_VERSION 1.5.1
|
ENV APACHE_SPARK_VERSION 1.5.1
|
||||||
RUN apt-get -y update && \
|
RUN apt-get -y update && \
|
||||||
@@ -90,12 +93,13 @@ RUN conda install --yes \
|
|||||||
RUN mkdir -p /opt/conda/share/jupyter/kernels/scala
|
RUN mkdir -p /opt/conda/share/jupyter/kernels/scala
|
||||||
COPY kernel.json /opt/conda/share/jupyter/kernels/scala/
|
COPY kernel.json /opt/conda/share/jupyter/kernels/scala/
|
||||||
|
|
||||||
USER root
|
# Install Python 2 kernel spec into the Python 3 conda environment which
|
||||||
|
# runs the notebook server
|
||||||
# Install Python 2 kernel spec globally to avoid permission problems when NB_UID
|
RUN bash -c '. activate python2 && \
|
||||||
# switching at runtime.
|
python -m ipykernel.kernelspec --prefix=$CONDA_DIR && \
|
||||||
RUN $CONDA_DIR/envs/python2/bin/python \
|
. deactivate'
|
||||||
$CONDA_DIR/envs/python2/bin/ipython \
|
# Set PYSPARK_HOME in the python2 spec
|
||||||
kernelspec install-self
|
RUN jq --arg v "$CONDA_DIR/envs/python2/bin/python" \
|
||||||
|
'.["env"]["PYSPARK_PYTHON"]=$v' \
|
||||||
USER jovyan
|
$CONDA_DIR/share/jupyter/kernels/python2/kernel.json > /tmp/kernel.json && \
|
||||||
|
mv /tmp/kernel.json $CONDA_DIR/share/jupyter/kernels/python2/kernel.json
|
||||||
|
@@ -32,7 +32,7 @@ This configuration is nice for using Spark on small, local data.
|
|||||||
1. Open a Python 2 or 3 notebook.
|
1. Open a Python 2 or 3 notebook.
|
||||||
2. Create a `SparkContext` configured for local mode.
|
2. Create a `SparkContext` configured for local mode.
|
||||||
|
|
||||||
For example, the first few cells in a Python 3 notebook might read:
|
For example, the first few cells in a notebook might read:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import pyspark
|
import pyspark
|
||||||
@@ -43,15 +43,6 @@ rdd = sc.parallelize(range(1000))
|
|||||||
rdd.takeSample(False, 5)
|
rdd.takeSample(False, 5)
|
||||||
```
|
```
|
||||||
|
|
||||||
In a Python 2 notebook, prefix the above with the following code to ensure the local workers use Python 2 as well.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
os.environ['PYSPARK_PYTHON'] = 'python2'
|
|
||||||
|
|
||||||
# include pyspark cells from above here ...
|
|
||||||
```
|
|
||||||
|
|
||||||
### In a R Notebook
|
### In a R Notebook
|
||||||
|
|
||||||
0. Run the container as shown above.
|
0. Run the container as shown above.
|
||||||
@@ -100,7 +91,7 @@ This configuration allows your compute cluster to scale with your data.
|
|||||||
|
|
||||||
0. Open a Python 2 or 3 notebook.
|
0. Open a Python 2 or 3 notebook.
|
||||||
1. Create a `SparkConf` instance in a new notebook pointing to your Mesos master node (or Zookeeper instance) and Spark binary package location.
|
1. Create a `SparkConf` instance in a new notebook pointing to your Mesos master node (or Zookeeper instance) and Spark binary package location.
|
||||||
2. Create a `SparkContext` using this configuration.
|
2. Create a `SparkContext` using this configuration.
|
||||||
|
|
||||||
For example, the first few cells in a Python 3 notebook might read:
|
For example, the first few cells in a Python 3 notebook might read:
|
||||||
|
|
||||||
@@ -115,7 +106,7 @@ conf = pyspark.SparkConf()
|
|||||||
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)
|
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)
|
||||||
conf.setMaster("mesos://10.10.10.10:5050")
|
conf.setMaster("mesos://10.10.10.10:5050")
|
||||||
# point to spark binary package in HDFS or on local filesystem on all slave
|
# point to spark binary package in HDFS or on local filesystem on all slave
|
||||||
# nodes (e.g., file:///opt/spark/spark-1.5.1-bin-hadoop2.6.tgz)
|
# nodes (e.g., file:///opt/spark/spark-1.5.1-bin-hadoop2.6.tgz)
|
||||||
conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-1.5.1-bin-hadoop2.6.tgz")
|
conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-1.5.1-bin-hadoop2.6.tgz")
|
||||||
# set other options as desired
|
# set other options as desired
|
||||||
conf.set("spark.executor.memory", "8g")
|
conf.set("spark.executor.memory", "8g")
|
||||||
|
@@ -6,6 +6,9 @@ MAINTAINER Jupyter Project <jupyter@googlegroups.com>
|
|||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
|
# Util to help with kernel spec later
|
||||||
|
RUN apt-get -y update && apt-get -y install jq
|
||||||
|
|
||||||
# Spark dependencies
|
# Spark dependencies
|
||||||
ENV APACHE_SPARK_VERSION 1.5.1
|
ENV APACHE_SPARK_VERSION 1.5.1
|
||||||
RUN apt-get -y update && \
|
RUN apt-get -y update && \
|
||||||
@@ -52,13 +55,13 @@ RUN conda create -p $CONDA_DIR/envs/python2 python=2.7 \
|
|||||||
pyzmq \
|
pyzmq \
|
||||||
&& conda clean -yt
|
&& conda clean -yt
|
||||||
|
|
||||||
USER root
|
# Install Python 2 kernel spec into the Python 3 conda environment which
|
||||||
|
# runs the notebook server
|
||||||
# Install Python 2 kernel spec globally to avoid permission problems when NB_UID
|
RUN bash -c '. activate python2 && \
|
||||||
# switching at runtime.
|
python -m ipykernel.kernelspec --prefix=$CONDA_DIR && \
|
||||||
RUN $CONDA_DIR/envs/python2/bin/python \
|
. deactivate'
|
||||||
$CONDA_DIR/envs/python2/bin/ipython \
|
# Set PYSPARK_HOME in the python2 spec
|
||||||
kernelspec install-self
|
RUN jq --arg v "$CONDA_DIR/envs/python2/bin/python" \
|
||||||
|
'.["env"]["PYSPARK_PYTHON"]=$v' \
|
||||||
USER jovyan
|
$CONDA_DIR/share/jupyter/kernels/python2/kernel.json > /tmp/kernel.json && \
|
||||||
|
mv /tmp/kernel.json $CONDA_DIR/share/jupyter/kernels/python2/kernel.json
|
||||||
|
@@ -27,7 +27,7 @@ This configuration is nice for using Spark on small, local data.
|
|||||||
2. Open a Python 2 or 3 notebook.
|
2. Open a Python 2 or 3 notebook.
|
||||||
3. Create a `SparkContext` configured for local mode.
|
3. Create a `SparkContext` configured for local mode.
|
||||||
|
|
||||||
For example, the first few cells in a Python 3 notebook might read:
|
For example, the first few cells in the notebook might read:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import pyspark
|
import pyspark
|
||||||
@@ -38,15 +38,6 @@ rdd = sc.parallelize(range(1000))
|
|||||||
rdd.takeSample(False, 5)
|
rdd.takeSample(False, 5)
|
||||||
```
|
```
|
||||||
|
|
||||||
In a Python 2 notebook, prefix the above with the following code to ensure the local workers use Python 2 as well.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
os.environ['PYSPARK_PYTHON'] = 'python2'
|
|
||||||
|
|
||||||
# include pyspark cells from above here ...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Connecting to a Spark Cluster on Mesos
|
## Connecting to a Spark Cluster on Mesos
|
||||||
|
|
||||||
This configuration allows your compute cluster to scale with your data.
|
This configuration allows your compute cluster to scale with your data.
|
||||||
@@ -58,7 +49,7 @@ This configuration allows your compute cluster to scale with your data.
|
|||||||
* NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details.
|
* NOTE: When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See https://github.com/jupyter/docker-stacks/issues/64 for details.
|
||||||
4. Open a Python 2 or 3 notebook.
|
4. Open a Python 2 or 3 notebook.
|
||||||
5. Create a `SparkConf` instance in a new notebook pointing to your Mesos master node (or Zookeeper instance) and Spark binary package location.
|
5. Create a `SparkConf` instance in a new notebook pointing to your Mesos master node (or Zookeeper instance) and Spark binary package location.
|
||||||
6. Create a `SparkContext` using this configuration.
|
6. Create a `SparkContext` using this configuration.
|
||||||
|
|
||||||
For example, the first few cells in a Python 3 notebook might read:
|
For example, the first few cells in a Python 3 notebook might read:
|
||||||
|
|
||||||
@@ -73,7 +64,7 @@ conf = pyspark.SparkConf()
|
|||||||
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)
|
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)
|
||||||
conf.setMaster("mesos://10.10.10.10:5050")
|
conf.setMaster("mesos://10.10.10.10:5050")
|
||||||
# point to spark binary package in HDFS or on local filesystem on all slave
|
# point to spark binary package in HDFS or on local filesystem on all slave
|
||||||
# nodes (e.g., file:///opt/spark/spark-1.5.1-bin-hadoop2.6.tgz)
|
# nodes (e.g., file:///opt/spark/spark-1.5.1-bin-hadoop2.6.tgz)
|
||||||
conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-1.5.1-bin-hadoop2.6.tgz")
|
conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-1.5.1-bin-hadoop2.6.tgz")
|
||||||
# set other options as desired
|
# set other options as desired
|
||||||
conf.set("spark.executor.memory", "8g")
|
conf.set("spark.executor.memory", "8g")
|
||||||
|
Reference in New Issue
Block a user