mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-10 11:32:57 +00:00

Spark installation improved by sourcing the `spark-config.sh` in the `before-notebook.d` hook that is run by `start.sh`. It permits to add automatically the right Py4J dependency version in the `PYTHONPATH`. So it is not needed anymore to set this variable at build time. Documentation describing the installation of a custom Spark version modified to remove this step. Also updated to install the latest `2.x` Spark version. `test_pyspark` fixed (was always OK before that).
31 lines
883 B
Python
31 lines
883 B
Python
# Copyright (c) Jupyter Development Team.
|
|
# Distributed under the terms of the Modified BSD License.
|
|
import logging
|
|
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
def test_spark_shell(container):
|
|
"""Checking if Spark (spark-shell) is running properly"""
|
|
c = container.run(
|
|
tty=True,
|
|
command=['start.sh', 'bash', '-c', 'spark-shell <<< "1+1"']
|
|
)
|
|
c.wait(timeout=60)
|
|
logs = c.logs(stdout=True).decode('utf-8')
|
|
LOGGER.debug(logs)
|
|
assert 'res0: Int = 2' in logs, "spark-shell does not work"
|
|
|
|
|
|
def test_pyspark(container):
|
|
"""PySpark should be in the Python path"""
|
|
c = container.run(
|
|
tty=True,
|
|
command=['start.sh', 'python', '-c', 'import pyspark']
|
|
)
|
|
rv = c.wait(timeout=30)
|
|
assert rv == 0 or rv["StatusCode"] == 0, "pyspark not in PYTHONPATH"
|
|
logs = c.logs(stdout=True).decode('utf-8')
|
|
LOGGER.debug(logs)
|