Upgrading Spark to 3.0, removing Toree

This commit is contained in:
Darek
2020-06-19 00:52:51 +00:00
parent da246268af
commit 568708d279
5 changed files with 9 additions and 107 deletions

View File

@@ -88,7 +88,7 @@ lint-build-test-all: $(foreach I,$(ALL_IMAGES),lint/$(I) arch_patch/$(I) build/$
lint-install: ## install hadolint lint-install: ## install hadolint
@echo "Installing hadolint at $(HADOLINT) ..." @echo "Installing hadolint at $(HADOLINT) ..."
@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.17.6/hadolint-$(shell uname -s)-$(shell uname -m)" @curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.18.0/hadolint-$(shell uname -s)-$(shell uname -m)"
@chmod 700 $(HADOLINT) @chmod 700 $(HADOLINT)
@echo "Installation done!" @echo "Installation done!"
@$(HADOLINT) --version @$(HADOLINT) --version

View File

@@ -33,16 +33,6 @@ RUN conda install --quiet --yes \
fix-permissions "${CONDA_DIR}" && \ fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}" fix-permissions "/home/${NB_USER}"
# Apache Toree kernel
# hadolint ignore=DL3013
RUN pip install --no-cache-dir \
https://dist.apache.org/repos/dist/release/incubator/toree/0.3.0-incubating/toree-pip/toree-0.3.0.tar.gz \
&& \
jupyter toree install --sys-prefix && \
rm -rf "/home/${NB_USER}/.local" && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Spylon-kernel # Spylon-kernel
RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \ RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \
conda clean --all -f -y && \ conda clean --all -f -y && \

View File

@@ -1,89 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Waiting for a Spark session to start..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"spark://master:7077\n"
]
}
],
"source": [
"// should print the value of --master in the kernel spec\n",
"println(sc.master)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Waiting for a Spark session to start..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"rdd = ParallelCollectionRDD[0] at parallelize at <console>:28\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"5050.0"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"// Sum of the first 100 whole numbers\n",
"val rdd = sc.parallelize(0 to 100)\n",
"rdd.sum()\n",
"// 5050"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Apache Toree - Scala",
"language": "scala",
"name": "apache_toree_scala"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".scala",
"mimetype": "text/x-scala",
"name": "scala",
"pygments_lexer": "scala",
"version": "2.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -12,7 +12,7 @@ LOGGER = logging.getLogger(__name__)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"test_file", "test_file",
# TODO: add local_sparklyr # TODO: add local_sparklyr
["local_pyspark", "local_spylon", "local_toree", "local_sparkR"], ["local_pyspark", "local_spylon", "local_sparkR"],
) )
def test_nbconvert(container, test_file): def test_nbconvert(container, test_file):
"""Check if Spark notebooks can be executed""" """Check if Spark notebooks can be executed"""

View File

@@ -11,19 +11,20 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
USER root USER root
# Spark dependencies # Spark dependencies
ENV APACHE_SPARK_VERSION=2.4.5 \ ENV APACHE_SPARK_VERSION=3.0.0 \
HADOOP_VERSION=2.7 HADOOP_VERSION=3.2
RUN apt-get -y update && \ RUN apt-get -y update && \
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \ apt-get install --no-install-recommends -y openjdk-11-jre-headless ca-certificates-java && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Using the preferred mirror to download Spark # Using the preferred mirror to download Spark
WORKDIR /tmp WORKDIR /tmp
# hadolint ignore=SC2046 # hadolint ignore=SC2046
RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \ RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \
python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \ python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \
echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \ echo "BFE45406C67CC4AE00411AD18CC438F51E7D4B6F14EB61E7BF6B5450897C2E8D3AB020152657C0239F253735C263512FFABF538AC5B9FFFA38B8295736A9C387 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \ tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \
rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"
@@ -32,7 +33,7 @@ RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark
# Configure Spark # Configure Spark
ENV SPARK_HOME=/usr/local/spark ENV SPARK_HOME=/usr/local/spark
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \ ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip \
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \ SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
PATH=$PATH:$SPARK_HOME/bin PATH=$PATH:$SPARK_HOME/bin