Start using spark4-preview versions (#2159)

* Start using spark4-preview versions

* Allow to download preview versions

* Expect warnings in spark

* Disable local_sparklyr test for now
This commit is contained in:
Ayaz Salikhov
2024-10-22 11:47:45 +01:00
committed by GitHub
parent 2f1cf2a2ef
commit b744182207
6 changed files with 27 additions and 6 deletions

View File

@@ -3,6 +3,15 @@
This changelog only contains breaking and/or significant changes manually introduced to this repository (using Pull Requests). This changelog only contains breaking and/or significant changes manually introduced to this repository (using Pull Requests).
All image manifests can be found in [the wiki](https://github.com/jupyter/docker-stacks/wiki). All image manifests can be found in [the wiki](https://github.com/jupyter/docker-stacks/wiki).
## 2024-10-22
Affected: `pyspark-notebook` and `all-spark-notebook` images users
- **Breaking:** Start using Spark 4.0.0 preview versions ([#2159](https://github.com/jupyter/docker-stacks/pull/2159)).
`sparklyr` doesn't seem to support Spark v4 yet when using Spark locally.
Reason: Spark v3 is not compatible with Python 3.12, and [the voting group has decided](https://github.com/jupyter/docker-stacks/pull/2072#issuecomment-2414123851) to switch to Spark v4 preview version.
## 2024-10-09 ## 2024-10-09
Affected: users building a custom set of images Affected: users building a custom set of images

View File

@@ -63,7 +63,7 @@ USER ${NB_UID}
RUN mamba install --yes \ RUN mamba install --yes \
'grpcio-status' \ 'grpcio-status' \
'grpcio' \ 'grpcio' \
'pandas=2.0.3' \ 'pandas=2.2.2' \
'pyarrow' && \ 'pyarrow' && \
mamba clean --all -f -y && \ mamba clean --all -f -y && \
fix-permissions "${CONDA_DIR}" && \ fix-permissions "${CONDA_DIR}" && \

View File

@@ -36,7 +36,7 @@ def get_latest_spark_version() -> str:
stable_versions = [ stable_versions = [
ref.removeprefix("spark-").removesuffix("/") ref.removeprefix("spark-").removesuffix("/")
for ref in all_refs for ref in all_refs
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref if ref.startswith("spark-") and "incubating" not in ref
] ]
# Compare versions semantically # Compare versions semantically

View File

@@ -14,7 +14,7 @@ THIS_DIR = Path(__file__).parent.resolve()
@pytest.mark.flaky(retries=3, delay=1) @pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"test_file", "test_file",
["issue_1168", "local_pyspark", "local_sparklyr", "local_sparkR"], ["issue_1168", "local_pyspark", "local_sparkR"],
) )
def test_nbconvert(container: TrackedContainer, test_file: str) -> None: def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
"""Check if Spark notebooks can be executed""" """Check if Spark notebooks can be executed"""
@@ -31,10 +31,14 @@ def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
) )
logs = container.run_and_wait( logs = container.run_and_wait(
timeout=60, timeout=60,
no_warnings=False,
volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}},
tty=True, tty=True,
command=["bash", "-c", command], command=["bash", "-c", command],
) )
warnings = TrackedContainer.get_warnings(logs)
assert len(warnings) == 1
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
expected_file = f"{output_dir}/{test_file}.md" expected_file = f"{output_dir}/{test_file}.md"
assert expected_file in logs, f"Expected file {expected_file} not generated" assert expected_file in logs, f"Expected file {expected_file} not generated"

View File

@@ -3,12 +3,20 @@
import logging import logging
from tests.conftest import TrackedContainer from tests.conftest import TrackedContainer
from tests.run_command import run_command
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
def test_spark_shell(container: TrackedContainer) -> None: def test_spark_shell(container: TrackedContainer) -> None:
"""Checking if Spark (spark-shell) is running properly""" """Checking if Spark (spark-shell) is running properly"""
logs = run_command(container, 'spark-shell <<< "1+1"', timeout=60) logs = container.run_and_wait(
timeout=60,
no_warnings=False,
tty=True,
command=["bash", "-c", 'spark-shell <<< "1+1"'],
)
warnings = TrackedContainer.get_warnings(logs)
assert len(warnings) == 1
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
assert "res0: Int = 2" in logs, "spark-shell does not work" assert "res0: Int = 2" in logs, "spark-shell does not work"

View File

@@ -2,4 +2,4 @@
# Distributed under the terms of the Modified BSD License. # Distributed under the terms of the Modified BSD License.
import pandas import pandas
assert pandas.__version__ == "2.0.3" assert pandas.__version__ == "2.2.2"