mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-12 04:22:58 +00:00
Upgrading Spark to 3.0, removing Toree
This commit is contained in:
2
Makefile
2
Makefile
@@ -88,7 +88,7 @@ lint-build-test-all: $(foreach I,$(ALL_IMAGES),lint/$(I) arch_patch/$(I) build/$
|
||||
|
||||
lint-install: ## install hadolint
|
||||
@echo "Installing hadolint at $(HADOLINT) ..."
|
||||
@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.17.6/hadolint-$(shell uname -s)-$(shell uname -m)"
|
||||
@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.18.0/hadolint-$(shell uname -s)-$(shell uname -m)"
|
||||
@chmod 700 $(HADOLINT)
|
||||
@echo "Installation done!"
|
||||
@$(HADOLINT) --version
|
||||
|
@@ -33,16 +33,6 @@ RUN conda install --quiet --yes \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Apache Toree kernel
|
||||
# hadolint ignore=DL3013
|
||||
RUN pip install --no-cache-dir \
|
||||
https://dist.apache.org/repos/dist/release/incubator/toree/0.3.0-incubating/toree-pip/toree-0.3.0.tar.gz \
|
||||
&& \
|
||||
jupyter toree install --sys-prefix && \
|
||||
rm -rf "/home/${NB_USER}/.local" && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Spylon-kernel
|
||||
RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \
|
||||
conda clean --all -f -y && \
|
||||
|
@@ -1,89 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Waiting for a Spark session to start..."
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"spark://master:7077\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"// should print the value of --master in the kernel spec\n",
|
||||
"println(sc.master)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Waiting for a Spark session to start..."
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"rdd = ParallelCollectionRDD[0] at parallelize at <console>:28\n"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"5050.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"// Sum of the first 100 whole numbers\n",
|
||||
"val rdd = sc.parallelize(0 to 100)\n",
|
||||
"rdd.sum()\n",
|
||||
"// 5050"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Apache Toree - Scala",
|
||||
"language": "scala",
|
||||
"name": "apache_toree_scala"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": "text/x-scala",
|
||||
"file_extension": ".scala",
|
||||
"mimetype": "text/x-scala",
|
||||
"name": "scala",
|
||||
"pygments_lexer": "scala",
|
||||
"version": "2.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@@ -12,7 +12,7 @@ LOGGER = logging.getLogger(__name__)
|
||||
@pytest.mark.parametrize(
|
||||
"test_file",
|
||||
# TODO: add local_sparklyr
|
||||
["local_pyspark", "local_spylon", "local_toree", "local_sparkR"],
|
||||
["local_pyspark", "local_spylon", "local_sparkR"],
|
||||
)
|
||||
def test_nbconvert(container, test_file):
|
||||
"""Check if Spark notebooks can be executed"""
|
||||
|
@@ -11,19 +11,20 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
USER root
|
||||
|
||||
# Spark dependencies
|
||||
ENV APACHE_SPARK_VERSION=2.4.5 \
|
||||
HADOOP_VERSION=2.7
|
||||
ENV APACHE_SPARK_VERSION=3.0.0 \
|
||||
HADOOP_VERSION=3.2
|
||||
|
||||
RUN apt-get -y update && \
|
||||
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
|
||||
apt-get install --no-install-recommends -y openjdk-11-jre-headless ca-certificates-java && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Using the preferred mirror to download Spark
|
||||
WORKDIR /tmp
|
||||
|
||||
# hadolint ignore=SC2046
|
||||
RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \
|
||||
python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \
|
||||
echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
||||
echo "BFE45406C67CC4AE00411AD18CC438F51E7D4B6F14EB61E7BF6B5450897C2E8D3AB020152657C0239F253735C263512FFABF538AC5B9FFFA38B8295736A9C387 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
||||
tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \
|
||||
rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"
|
||||
|
||||
@@ -32,7 +33,7 @@ RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark
|
||||
|
||||
# Configure Spark
|
||||
ENV SPARK_HOME=/usr/local/spark
|
||||
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \
|
||||
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip \
|
||||
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
|
||||
PATH=$PATH:$SPARK_HOME/bin
|
||||
|
||||
|
Reference in New Issue
Block a user