diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index 03d74cc7..84444335 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -33,10 +33,12 @@ RUN apt-get update --yes && \ # Spark installation WORKDIR /tmp +# You need to use https://archive.apache.org/dist/ website if you want to download old Spark versions +# But it seems to be slower, that's why we use recommended site for download RUN if [ -z "${scala_version}" ]; then \ - wget -qO "spark.tgz" "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"; \ + wget -qO "spark.tgz" "https://dlcdn.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"; \ else \ - wget -qO "spark.tgz" "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz"; \ + wget -qO "spark.tgz" "https://dlcdn.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz"; \ fi && \ echo "${spark_checksum} *spark.tgz" | sha512sum -c - && \ tar xzf "spark.tgz" -C /usr/local --owner root --group root --no-same-owner && \