mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-07 18:14:05 +00:00
Install Spark 4 release version (#2300)
This commit is contained in:
@@ -64,13 +64,12 @@ USER ${NB_UID}
|
|||||||
# NOTE: It's important to ensure compatibility between Pandas versions.
|
# NOTE: It's important to ensure compatibility between Pandas versions.
|
||||||
# The pandas version in this Dockerfile should match the version
|
# The pandas version in this Dockerfile should match the version
|
||||||
# on which the Pandas API for Spark is built.
|
# on which the Pandas API for Spark is built.
|
||||||
# To find the right version:
|
# To find the right version, check the pandas version being installed here:
|
||||||
# 1. Check out the Spark branch you are on: <https://github.com/apache/spark>
|
# https://github.com/apache/spark/blob/<SPARK_VERSION>/dev/infra/Dockerfile
|
||||||
# 2. Find the pandas version in the file `dev/infra/Dockerfile`.
|
|
||||||
RUN mamba install --yes \
|
RUN mamba install --yes \
|
||||||
'grpcio-status' \
|
'grpcio-status' \
|
||||||
'grpcio' \
|
'grpcio' \
|
||||||
'pandas=2.2.2' \
|
'pandas=2.2.3' \
|
||||||
'pyarrow' && \
|
'pyarrow' && \
|
||||||
mamba clean --all -f -y && \
|
mamba clean --all -f -y && \
|
||||||
fix-permissions "${CONDA_DIR}" && \
|
fix-permissions "${CONDA_DIR}" && \
|
||||||
|
@@ -35,11 +35,8 @@ def get_latest_spark_version() -> str:
|
|||||||
LOGGER.info("Downloading Spark versions information")
|
LOGGER.info("Downloading Spark versions information")
|
||||||
all_refs = get_all_refs("https://archive.apache.org/dist/spark/")
|
all_refs = get_all_refs("https://archive.apache.org/dist/spark/")
|
||||||
LOGGER.info(f"All refs: {all_refs}")
|
LOGGER.info(f"All refs: {all_refs}")
|
||||||
versions = [
|
pattern = re.compile(r"^spark-(\d+\.\d+\.\d+)/$")
|
||||||
ref.removeprefix("spark-").removesuffix("/")
|
versions = [match.group(1) for ref in all_refs if (match := pattern.match(ref))]
|
||||||
for ref in all_refs
|
|
||||||
if re.match(r"^spark-\d", ref) is not None and "incubating" not in ref
|
|
||||||
]
|
|
||||||
LOGGER.info(f"Available versions: {versions}")
|
LOGGER.info(f"Available versions: {versions}")
|
||||||
|
|
||||||
# Compare versions semantically
|
# Compare versions semantically
|
||||||
@@ -74,6 +71,7 @@ def download_spark(
|
|||||||
spark_dir_name += f"-scala{scala_version}"
|
spark_dir_name += f"-scala{scala_version}"
|
||||||
LOGGER.info(f"Spark directory name: {spark_dir_name}")
|
LOGGER.info(f"Spark directory name: {spark_dir_name}")
|
||||||
spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz"
|
spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz"
|
||||||
|
LOGGER.info(f"Spark download URL: {spark_url}")
|
||||||
|
|
||||||
tmp_file = Path("/tmp/spark.tar.gz")
|
tmp_file = Path("/tmp/spark.tar.gz")
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
|
@@ -2,4 +2,4 @@
|
|||||||
# Distributed under the terms of the Modified BSD License.
|
# Distributed under the terms of the Modified BSD License.
|
||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
assert pandas.__version__ == "2.2.2"
|
assert pandas.__version__ == "2.2.3"
|
||||||
|
@@ -17,7 +17,7 @@ def check_nbconvert(
|
|||||||
no_warnings: bool = True,
|
no_warnings: bool = True,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Check if nbconvert is able to convert a notebook file"""
|
"""Check if nbconvert is able to convert a notebook file"""
|
||||||
cont_data_file = "/home/jovyan/data/" + host_file.name
|
cont_data_file = "/home/jovyan/" + host_file.name
|
||||||
|
|
||||||
output_dir = "/tmp"
|
output_dir = "/tmp"
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
|
Reference in New Issue
Block a user