mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-17 15:02:57 +00:00
Add Scala as --build-arg
(#1757)
* add scala version choise * add ; \ fi * change checksum and removed default scala version * remove RUN * add { } and remove old code * remove 3 duplicated lines. * Add the commint as a comment * Add back #Fix * Rename downloadeds as spark.tgz * Fix doc * Update specifics.md * New fix * Fix wget * Remove make link to spark * Set full path to /usr/local/spark * Change /usr/local/spark to ${SPARK_HOME} * fix RUN with if * Remove empty lines * Update Dockerfile * Update Dockerfile * Update Dockerfile Co-authored-by: Ayaz Salikhov <mathbunnyru@users.noreply.github.com>
This commit is contained in:
@@ -17,8 +17,8 @@ USER root
|
||||
# (ARGS are in lower case to distinguish them from ENV)
|
||||
ARG spark_version="3.3.0"
|
||||
ARG hadoop_version="3"
|
||||
ARG scala_version="2.13"
|
||||
ARG spark_checksum="4c09dac70e22bf1d5b7b2cabc1dd92aba13237f52a5b682c67982266fc7a0f5e0f964edff9bc76adbd8cb444eb1a00fdc59516147f99e4e2ce068420ff4881f0"
|
||||
ARG scala_version
|
||||
ARG spark_checksum="1e8234d0c1d2ab4462d6b0dfe5b54f2851dcd883378e0ed756140e10adfb5be4123961b521140f580e364c239872ea5a9f813a20b73c69cb6d4e95da2575c29c"
|
||||
ARG openjdk_version="17"
|
||||
|
||||
ENV APACHE_SPARK_VERSION="${spark_version}" \
|
||||
@@ -32,22 +32,29 @@ RUN apt-get update --yes && \
|
||||
|
||||
# Spark installation
|
||||
WORKDIR /tmp
|
||||
RUN wget -q "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz" && \
|
||||
echo "${spark_checksum} *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz" | sha512sum -c - && \
|
||||
tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz" -C /usr/local --owner root --group root --no-same-owner && \
|
||||
rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz"
|
||||
|
||||
WORKDIR /usr/local
|
||||
RUN if [ -z "${scala_version}" ]; then \
|
||||
wget -qO "spark.tgz" "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"; \
|
||||
else \
|
||||
wget -qO "spark.tgz" "https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}.tgz"; \
|
||||
fi && \
|
||||
echo "${spark_checksum} *spark.tgz" | sha512sum -c - && \
|
||||
tar xzf "spark.tgz" -C /usr/local --owner root --group root --no-same-owner && \
|
||||
rm "spark.tgz"
|
||||
|
||||
# Configure Spark
|
||||
ENV SPARK_HOME=/usr/local/spark
|
||||
ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
|
||||
PATH="${PATH}:${SPARK_HOME}/bin"
|
||||
|
||||
RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}" spark && \
|
||||
# Add a link in the before_notebook hook in order to source automatically PYTHONPATH
|
||||
mkdir -p /usr/local/bin/before-notebook.d && \
|
||||
ln -s "${SPARK_HOME}/sbin/spark-config.sh" /usr/local/bin/before-notebook.d/spark-config.sh
|
||||
RUN if [ -z "${scala_version}" ]; then \
|
||||
ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" "${SPARK_HOME}"; \
|
||||
else \
|
||||
ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${scala_version}" "${SPARK_HOME}"; \
|
||||
fi && \
|
||||
# Add a link in the before_notebook hook in order to source automatically PYTHONPATH && \
|
||||
mkdir -p /usr/local/bin/before-notebook.d && \
|
||||
ln -s "${SPARK_HOME}/sbin/spark-config.sh" /usr/local/bin/before-notebook.d/spark-config.sh
|
||||
|
||||
# Configure IPython system-wide
|
||||
COPY ipython_kernel_config.py "/etc/ipython/"
|
||||
|
Reference in New Issue
Block a user