mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-15 22:12:57 +00:00
Upgrade Apache Spark
to 3.5.0 (#1995)
* 1. * add note for pandas version * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update images/pyspark-notebook/Dockerfile Co-authored-by: Ayaz Salikhov <mathbunnyru@users.noreply.github.com> --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ayaz Salikhov <mathbunnyru@users.noreply.github.com>
This commit is contained in:
@@ -15,10 +15,10 @@ USER root
|
|||||||
# Spark dependencies
|
# Spark dependencies
|
||||||
# Default values can be overridden at build time
|
# Default values can be overridden at build time
|
||||||
# (ARGS are in lower case to distinguish them from ENV)
|
# (ARGS are in lower case to distinguish them from ENV)
|
||||||
ARG spark_version="3.4.1"
|
ARG spark_version="3.5.0"
|
||||||
ARG hadoop_version="3"
|
ARG hadoop_version="3"
|
||||||
ARG scala_version
|
ARG scala_version
|
||||||
ARG spark_checksum="5a21295b4c3d1d3f8fc85375c711c7c23e3eeb3ec9ea91778f149d8d321e3905e2f44cf19c69a28df693cffd536f7316706c78932e7e148d224424150f18b2c5"
|
ARG spark_checksum="8883c67e0a138069e597f3e7d4edbbd5c3a565d50b28644aad02856a1ec1da7cb92b8f80454ca427118f69459ea326eaa073cf7b1a860c3b796f4b07c2101319"
|
||||||
ARG openjdk_version="17"
|
ARG openjdk_version="17"
|
||||||
|
|
||||||
ENV APACHE_SPARK_VERSION="${spark_version}" \
|
ENV APACHE_SPARK_VERSION="${spark_version}" \
|
||||||
@@ -66,9 +66,14 @@ RUN fix-permissions "/etc/ipython/"
|
|||||||
USER ${NB_UID}
|
USER ${NB_UID}
|
||||||
|
|
||||||
# Install pyarrow
|
# Install pyarrow
|
||||||
# Temporarily pin pandas to version 1.5.3, see: https://github.com/jupyter/docker-stacks/issues/1924
|
# NOTE: It's important to ensure compatibility between Pandas versions.
|
||||||
|
# The pandas version in this Dockerfile should match the version
|
||||||
|
# on which the Pandas API for Spark is built.
|
||||||
|
# To find the right version:
|
||||||
|
# 1. Check out the Spark branch you are on.
|
||||||
|
# 2. Find the pandas version in the file spark/dev/infra/Dockerfile.
|
||||||
RUN mamba install --yes \
|
RUN mamba install --yes \
|
||||||
'pandas>=1.5.3,<2.0.0' \
|
'pandas=2.0.3' \
|
||||||
'pyarrow' && \
|
'pyarrow' && \
|
||||||
mamba clean --all -f -y && \
|
mamba clean --all -f -y && \
|
||||||
fix-permissions "${CONDA_DIR}" && \
|
fix-permissions "${CONDA_DIR}" && \
|
||||||
|
@@ -2,4 +2,4 @@
|
|||||||
# Distributed under the terms of the Modified BSD License.
|
# Distributed under the terms of the Modified BSD License.
|
||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
assert pandas.__version__ == "1.5.3"
|
assert pandas.__version__ == "2.0.3"
|
||||||
|
Reference in New Issue
Block a user