mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-12 04:22:58 +00:00
Start using spark4-preview versions (#2159)
* Start using spark4-preview versions * Allow to download preview versions * Expect warnings in spark * Disable local_sparklyr test for now
This commit is contained in:
@@ -3,6 +3,15 @@
|
|||||||
This changelog only contains breaking and/or significant changes manually introduced to this repository (using Pull Requests).
|
This changelog only contains breaking and/or significant changes manually introduced to this repository (using Pull Requests).
|
||||||
All image manifests can be found in [the wiki](https://github.com/jupyter/docker-stacks/wiki).
|
All image manifests can be found in [the wiki](https://github.com/jupyter/docker-stacks/wiki).
|
||||||
|
|
||||||
|
## 2024-10-22
|
||||||
|
|
||||||
|
Affected: `pyspark-notebook` and `all-spark-notebook` images users
|
||||||
|
|
||||||
|
- **Breaking:** Start using Spark 4.0.0 preview versions ([#2159](https://github.com/jupyter/docker-stacks/pull/2159)).
|
||||||
|
`sparklyr` doesn't seem to support Spark v4 yet when using Spark locally.
|
||||||
|
|
||||||
|
Reason: Spark v3 is not compatible with Python 3.12, and [the voting group has decided](https://github.com/jupyter/docker-stacks/pull/2072#issuecomment-2414123851) to switch to Spark v4 preview version.
|
||||||
|
|
||||||
## 2024-10-09
|
## 2024-10-09
|
||||||
|
|
||||||
Affected: users building a custom set of images
|
Affected: users building a custom set of images
|
||||||
|
@@ -63,7 +63,7 @@ USER ${NB_UID}
|
|||||||
RUN mamba install --yes \
|
RUN mamba install --yes \
|
||||||
'grpcio-status' \
|
'grpcio-status' \
|
||||||
'grpcio' \
|
'grpcio' \
|
||||||
'pandas=2.0.3' \
|
'pandas=2.2.2' \
|
||||||
'pyarrow' && \
|
'pyarrow' && \
|
||||||
mamba clean --all -f -y && \
|
mamba clean --all -f -y && \
|
||||||
fix-permissions "${CONDA_DIR}" && \
|
fix-permissions "${CONDA_DIR}" && \
|
||||||
|
@@ -36,7 +36,7 @@ def get_latest_spark_version() -> str:
|
|||||||
stable_versions = [
|
stable_versions = [
|
||||||
ref.removeprefix("spark-").removesuffix("/")
|
ref.removeprefix("spark-").removesuffix("/")
|
||||||
for ref in all_refs
|
for ref in all_refs
|
||||||
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
|
if ref.startswith("spark-") and "incubating" not in ref
|
||||||
]
|
]
|
||||||
|
|
||||||
# Compare versions semantically
|
# Compare versions semantically
|
||||||
|
@@ -14,7 +14,7 @@ THIS_DIR = Path(__file__).parent.resolve()
|
|||||||
@pytest.mark.flaky(retries=3, delay=1)
|
@pytest.mark.flaky(retries=3, delay=1)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"test_file",
|
"test_file",
|
||||||
["issue_1168", "local_pyspark", "local_sparklyr", "local_sparkR"],
|
["issue_1168", "local_pyspark", "local_sparkR"],
|
||||||
)
|
)
|
||||||
def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
|
def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
|
||||||
"""Check if Spark notebooks can be executed"""
|
"""Check if Spark notebooks can be executed"""
|
||||||
@@ -31,10 +31,14 @@ def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
|
|||||||
)
|
)
|
||||||
logs = container.run_and_wait(
|
logs = container.run_and_wait(
|
||||||
timeout=60,
|
timeout=60,
|
||||||
|
no_warnings=False,
|
||||||
volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}},
|
volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}},
|
||||||
tty=True,
|
tty=True,
|
||||||
command=["bash", "-c", command],
|
command=["bash", "-c", command],
|
||||||
)
|
)
|
||||||
|
warnings = TrackedContainer.get_warnings(logs)
|
||||||
|
assert len(warnings) == 1
|
||||||
|
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
|
||||||
|
|
||||||
expected_file = f"{output_dir}/{test_file}.md"
|
expected_file = f"{output_dir}/{test_file}.md"
|
||||||
assert expected_file in logs, f"Expected file {expected_file} not generated"
|
assert expected_file in logs, f"Expected file {expected_file} not generated"
|
||||||
|
@@ -3,12 +3,20 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from tests.conftest import TrackedContainer
|
from tests.conftest import TrackedContainer
|
||||||
from tests.run_command import run_command
|
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def test_spark_shell(container: TrackedContainer) -> None:
|
def test_spark_shell(container: TrackedContainer) -> None:
|
||||||
"""Checking if Spark (spark-shell) is running properly"""
|
"""Checking if Spark (spark-shell) is running properly"""
|
||||||
logs = run_command(container, 'spark-shell <<< "1+1"', timeout=60)
|
logs = container.run_and_wait(
|
||||||
|
timeout=60,
|
||||||
|
no_warnings=False,
|
||||||
|
tty=True,
|
||||||
|
command=["bash", "-c", 'spark-shell <<< "1+1"'],
|
||||||
|
)
|
||||||
|
warnings = TrackedContainer.get_warnings(logs)
|
||||||
|
assert len(warnings) == 1
|
||||||
|
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
|
||||||
|
|
||||||
assert "res0: Int = 2" in logs, "spark-shell does not work"
|
assert "res0: Int = 2" in logs, "spark-shell does not work"
|
||||||
|
@@ -2,4 +2,4 @@
|
|||||||
# Distributed under the terms of the Modified BSD License.
|
# Distributed under the terms of the Modified BSD License.
|
||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
assert pandas.__version__ == "2.0.3"
|
assert pandas.__version__ == "2.2.2"
|
||||||
|
Reference in New Issue
Block a user