mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-12 04:22:58 +00:00
Upgrading Spark to 3.0, removing Toree
This commit is contained in:
2
Makefile
2
Makefile
@@ -88,7 +88,7 @@ lint-build-test-all: $(foreach I,$(ALL_IMAGES),lint/$(I) arch_patch/$(I) build/$
|
|||||||
|
|
||||||
lint-install: ## install hadolint
|
lint-install: ## install hadolint
|
||||||
@echo "Installing hadolint at $(HADOLINT) ..."
|
@echo "Installing hadolint at $(HADOLINT) ..."
|
||||||
@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.17.6/hadolint-$(shell uname -s)-$(shell uname -m)"
|
@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.18.0/hadolint-$(shell uname -s)-$(shell uname -m)"
|
||||||
@chmod 700 $(HADOLINT)
|
@chmod 700 $(HADOLINT)
|
||||||
@echo "Installation done!"
|
@echo "Installation done!"
|
||||||
@$(HADOLINT) --version
|
@$(HADOLINT) --version
|
||||||
|
@@ -33,16 +33,6 @@ RUN conda install --quiet --yes \
|
|||||||
fix-permissions "${CONDA_DIR}" && \
|
fix-permissions "${CONDA_DIR}" && \
|
||||||
fix-permissions "/home/${NB_USER}"
|
fix-permissions "/home/${NB_USER}"
|
||||||
|
|
||||||
# Apache Toree kernel
|
|
||||||
# hadolint ignore=DL3013
|
|
||||||
RUN pip install --no-cache-dir \
|
|
||||||
https://dist.apache.org/repos/dist/release/incubator/toree/0.3.0-incubating/toree-pip/toree-0.3.0.tar.gz \
|
|
||||||
&& \
|
|
||||||
jupyter toree install --sys-prefix && \
|
|
||||||
rm -rf "/home/${NB_USER}/.local" && \
|
|
||||||
fix-permissions "${CONDA_DIR}" && \
|
|
||||||
fix-permissions "/home/${NB_USER}"
|
|
||||||
|
|
||||||
# Spylon-kernel
|
# Spylon-kernel
|
||||||
RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \
|
RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \
|
||||||
conda clean --all -f -y && \
|
conda clean --all -f -y && \
|
||||||
|
@@ -1,89 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"Waiting for a Spark session to start..."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"spark://master:7077\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"// should print the value of --master in the kernel spec\n",
|
|
||||||
"println(sc.master)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"Waiting for a Spark session to start..."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"rdd = ParallelCollectionRDD[0] at parallelize at <console>:28\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"5050.0"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"// Sum of the first 100 whole numbers\n",
|
|
||||||
"val rdd = sc.parallelize(0 to 100)\n",
|
|
||||||
"rdd.sum()\n",
|
|
||||||
"// 5050"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Apache Toree - Scala",
|
|
||||||
"language": "scala",
|
|
||||||
"name": "apache_toree_scala"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": "text/x-scala",
|
|
||||||
"file_extension": ".scala",
|
|
||||||
"mimetype": "text/x-scala",
|
|
||||||
"name": "scala",
|
|
||||||
"pygments_lexer": "scala",
|
|
||||||
"version": "2.11.12"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
@@ -12,7 +12,7 @@ LOGGER = logging.getLogger(__name__)
|
|||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"test_file",
|
"test_file",
|
||||||
# TODO: add local_sparklyr
|
# TODO: add local_sparklyr
|
||||||
["local_pyspark", "local_spylon", "local_toree", "local_sparkR"],
|
["local_pyspark", "local_spylon", "local_sparkR"],
|
||||||
)
|
)
|
||||||
def test_nbconvert(container, test_file):
|
def test_nbconvert(container, test_file):
|
||||||
"""Check if Spark notebooks can be executed"""
|
"""Check if Spark notebooks can be executed"""
|
||||||
|
@@ -11,19 +11,20 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
|||||||
USER root
|
USER root
|
||||||
|
|
||||||
# Spark dependencies
|
# Spark dependencies
|
||||||
ENV APACHE_SPARK_VERSION=2.4.5 \
|
ENV APACHE_SPARK_VERSION=3.0.0 \
|
||||||
HADOOP_VERSION=2.7
|
HADOOP_VERSION=3.2
|
||||||
|
|
||||||
RUN apt-get -y update && \
|
RUN apt-get -y update && \
|
||||||
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
|
apt-get install --no-install-recommends -y openjdk-11-jre-headless ca-certificates-java && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Using the preferred mirror to download Spark
|
# Using the preferred mirror to download Spark
|
||||||
WORKDIR /tmp
|
WORKDIR /tmp
|
||||||
|
|
||||||
# hadolint ignore=SC2046
|
# hadolint ignore=SC2046
|
||||||
RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \
|
RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \
|
||||||
python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \
|
python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \
|
||||||
echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
echo "BFE45406C67CC4AE00411AD18CC438F51E7D4B6F14EB61E7BF6B5450897C2E8D3AB020152657C0239F253735C263512FFABF538AC5B9FFFA38B8295736A9C387 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
||||||
tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \
|
tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \
|
||||||
rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"
|
rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"
|
||||||
|
|
||||||
@@ -32,7 +33,7 @@ RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark
|
|||||||
|
|
||||||
# Configure Spark
|
# Configure Spark
|
||||||
ENV SPARK_HOME=/usr/local/spark
|
ENV SPARK_HOME=/usr/local/spark
|
||||||
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \
|
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip \
|
||||||
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
|
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
|
||||||
PATH=$PATH:$SPARK_HOME/bin
|
PATH=$PATH:$SPARK_HOME/bin
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user