mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-12 04:22:58 +00:00
Start using spark4-preview versions (#2159)
* Start using spark4-preview versions * Allow to download preview versions * Expect warnings in spark * Disable local_sparklyr test for now
This commit is contained in:
@@ -3,6 +3,15 @@
|
||||
This changelog only contains breaking and/or significant changes manually introduced to this repository (using Pull Requests).
|
||||
All image manifests can be found in [the wiki](https://github.com/jupyter/docker-stacks/wiki).
|
||||
|
||||
## 2024-10-22
|
||||
|
||||
Affected: `pyspark-notebook` and `all-spark-notebook` images users
|
||||
|
||||
- **Breaking:** Start using Spark 4.0.0 preview versions ([#2159](https://github.com/jupyter/docker-stacks/pull/2159)).
|
||||
`sparklyr` doesn't seem to support Spark v4 yet when using Spark locally.
|
||||
|
||||
Reason: Spark v3 is not compatible with Python 3.12, and [the voting group has decided](https://github.com/jupyter/docker-stacks/pull/2072#issuecomment-2414123851) to switch to Spark v4 preview version.
|
||||
|
||||
## 2024-10-09
|
||||
|
||||
Affected: users building a custom set of images
|
||||
|
@@ -63,7 +63,7 @@ USER ${NB_UID}
|
||||
RUN mamba install --yes \
|
||||
'grpcio-status' \
|
||||
'grpcio' \
|
||||
'pandas=2.0.3' \
|
||||
'pandas=2.2.2' \
|
||||
'pyarrow' && \
|
||||
mamba clean --all -f -y && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
|
@@ -36,7 +36,7 @@ def get_latest_spark_version() -> str:
|
||||
stable_versions = [
|
||||
ref.removeprefix("spark-").removesuffix("/")
|
||||
for ref in all_refs
|
||||
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
|
||||
if ref.startswith("spark-") and "incubating" not in ref
|
||||
]
|
||||
|
||||
# Compare versions semantically
|
||||
|
@@ -14,7 +14,7 @@ THIS_DIR = Path(__file__).parent.resolve()
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
@pytest.mark.parametrize(
|
||||
"test_file",
|
||||
["issue_1168", "local_pyspark", "local_sparklyr", "local_sparkR"],
|
||||
["issue_1168", "local_pyspark", "local_sparkR"],
|
||||
)
|
||||
def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
|
||||
"""Check if Spark notebooks can be executed"""
|
||||
@@ -31,10 +31,14 @@ def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
|
||||
)
|
||||
logs = container.run_and_wait(
|
||||
timeout=60,
|
||||
no_warnings=False,
|
||||
volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}},
|
||||
tty=True,
|
||||
command=["bash", "-c", command],
|
||||
)
|
||||
warnings = TrackedContainer.get_warnings(logs)
|
||||
assert len(warnings) == 1
|
||||
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
|
||||
|
||||
expected_file = f"{output_dir}/{test_file}.md"
|
||||
assert expected_file in logs, f"Expected file {expected_file} not generated"
|
||||
|
@@ -3,12 +3,20 @@
|
||||
import logging
|
||||
|
||||
from tests.conftest import TrackedContainer
|
||||
from tests.run_command import run_command
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_spark_shell(container: TrackedContainer) -> None:
|
||||
"""Checking if Spark (spark-shell) is running properly"""
|
||||
logs = run_command(container, 'spark-shell <<< "1+1"', timeout=60)
|
||||
logs = container.run_and_wait(
|
||||
timeout=60,
|
||||
no_warnings=False,
|
||||
tty=True,
|
||||
command=["bash", "-c", 'spark-shell <<< "1+1"'],
|
||||
)
|
||||
warnings = TrackedContainer.get_warnings(logs)
|
||||
assert len(warnings) == 1
|
||||
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
|
||||
|
||||
assert "res0: Int = 2" in logs, "spark-shell does not work"
|
||||
|
@@ -2,4 +2,4 @@
|
||||
# Distributed under the terms of the Modified BSD License.
|
||||
import pandas
|
||||
|
||||
assert pandas.__version__ == "2.0.3"
|
||||
assert pandas.__version__ == "2.2.2"
|
||||
|
Reference in New Issue
Block a user