Use curl instead of wget for Spark and Julia downloads (#1950)

Co-authored-by: Bjørn Jørgensen <bjornjorgensen@users.noreply.github.com>
2025-10-12 04:22:58 +00:00 · 2023-08-03 13:18:01 +04:00
parent 1d9e4f99a5
commit e1bd309263
4 changed files with 8 additions and 3 deletions
--- a/docs/using/selecting.md
+++ b/docs/using/selecting.md
@@ -71,6 +71,7 @@ It contains:
 - Everything in `jupyter/base-notebook`
 - Common useful utilities like
  [curl](https://curl.se),
  [git](https://git-scm.com/),
  [nano](https://www.nano-editor.org/) (actually `nano-tiny`),
  [tzdata](https://www.iana.org/time-zones),
--- a/minimal-notebook/Dockerfile
+++ b/minimal-notebook/Dockerfile
@@ -16,6 +16,7 @@ USER root
 RUN apt-get update --yes && \
    apt-get install --yes --no-install-recommends \
    # Common useful utilities
    curl \
    git \
    nano-tiny \
    tzdata \
--- a/minimal-notebook/setup-scripts/setup-julia.bash
+++ b/minimal-notebook/setup-scripts/setup-julia.bash
@@ -22,7 +22,8 @@ JULIA_MAJOR_MINOR=$(echo "${JULIA_VERSION}" | cut -d. -f 1,2)
 # Download and install Julia
 cd /tmp
 mkdir "/opt/julia-${JULIA_VERSION}"
-wget --progress=dot:giga "https://julialang-s3.julialang.org/bin/linux/${JULIA_SHORT_ARCH}/${JULIA_MAJOR_MINOR}/${JULIA_INSTALLER}"
+curl --progress-bar --location --output "${JULIA_INSTALLER}" \
    "https://julialang-s3.julialang.org/bin/linux/${JULIA_SHORT_ARCH}/${JULIA_MAJOR_MINOR}/${JULIA_INSTALLER}"
 tar xzf "${JULIA_INSTALLER}" -C "/opt/julia-${JULIA_VERSION}" --strip-components=1
 rm "${JULIA_INSTALLER}"
--- a/pyspark-notebook/Dockerfile
+++ b/pyspark-notebook/Dockerfile
@@ -36,9 +36,11 @@ WORKDIR /tmp
 # You need to use https://archive.apache.org/dist/ website if you want to download old Spark versions
 # But it seems to be slower, that's why we use recommended site for download
 RUN if [ -z "${scala_version}" ]; then \
-    wget --progress=dot:giga -O "spark.tgz" "https://dlcdn.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"; \
+    curl --progress-bar --location --output "spark.tgz" \
        "https://dlcdn.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"; \
  else \
-    wget --progress=dot:giga -O "spark.tgz" "https://dlcdn.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz"; \
+    curl --progress-bar --location --output "spark.tgz" \
        "https://dlcdn.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz"; \
  fi && \
  echo "${spark_checksum} *spark.tgz" | sha512sum -c - && \
  tar xzf "spark.tgz" -C /usr/local --owner root --group root --no-same-owner && \