mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-12 04:22:58 +00:00
Bump to spark 2.4.5 + minor improvements
This commit is contained in:
2
Makefile
2
Makefile
@@ -90,7 +90,7 @@ tx-en: ## rebuild en locale strings and push to master (req: GH_TOKEN)
|
|||||||
|
|
||||||
|
|
||||||
test/%: ## run tests against a stack
|
test/%: ## run tests against a stack
|
||||||
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test
|
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test $(notdir $@)/test
|
||||||
|
|
||||||
test/base-notebook: ## test supported options in the base notebook
|
test/base-notebook: ## test supported options in the base notebook
|
||||||
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test base-notebook/test
|
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test base-notebook/test
|
@@ -78,6 +78,20 @@ def test_chown_extra(container):
|
|||||||
assert '/opt/conda/LICENSE.txt:1010:101' in c.logs(stdout=True).decode('utf-8')
|
assert '/opt/conda/LICENSE.txt:1010:101' in c.logs(stdout=True).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
def test_chown_home(container):
|
||||||
|
"""Container should change the NB_USER home directory owner and
|
||||||
|
group to the current value of NB_UID and NB_GID."""
|
||||||
|
c = container.run(
|
||||||
|
tty=True,
|
||||||
|
user='root',
|
||||||
|
environment=['CHOWN_HOME=yes',
|
||||||
|
'CHOWN_HOME_OPTS=-R',
|
||||||
|
],
|
||||||
|
command=['start.sh', 'bash', '-c', 'chown root:root /home/jovyan && ls -alsh /home']
|
||||||
|
)
|
||||||
|
assert "Changing ownership of /home/jovyan to 1000:100 with options '-R'" in c.logs(stdout=True).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
def test_sudo(container):
|
def test_sudo(container):
|
||||||
"""Container should grant passwordless sudo to the default user."""
|
"""Container should grant passwordless sudo to the default user."""
|
||||||
c = container.run(
|
c = container.run(
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
# Copyright (c) Jupyter Development Team.
|
# Copyright (c) Jupyter Development Team.
|
||||||
# Distributed under the terms of the Modified BSD License.
|
# Distributed under the terms of the Modified BSD License.
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
import docker
|
import docker
|
||||||
import pytest
|
import pytest
|
||||||
@@ -10,6 +11,8 @@ from requests.packages.urllib3.util.retry import Retry
|
|||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
|
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
def http_client():
|
def http_client():
|
||||||
"""Requests session with retries and backoff."""
|
"""Requests session with retries and backoff."""
|
||||||
@@ -72,6 +75,7 @@ class TrackedContainer(object):
|
|||||||
all_kwargs = {}
|
all_kwargs = {}
|
||||||
all_kwargs.update(self.kwargs)
|
all_kwargs.update(self.kwargs)
|
||||||
all_kwargs.update(kwargs)
|
all_kwargs.update(kwargs)
|
||||||
|
LOGGER.info(f"Running {self.image_name} with args {all_kwargs} ...")
|
||||||
self.container = self.docker_client.containers.run(self.image_name, **all_kwargs)
|
self.container = self.docker_client.containers.run(self.image_name, **all_kwargs)
|
||||||
return self.container
|
return self.container
|
||||||
|
|
||||||
|
@@ -8,8 +8,8 @@ LABEL maintainer="Jupyter Project <jupyter@googlegroups.com>"
|
|||||||
USER root
|
USER root
|
||||||
|
|
||||||
# Spark dependencies
|
# Spark dependencies
|
||||||
ENV APACHE_SPARK_VERSION 2.4.4
|
ENV APACHE_SPARK_VERSION=2.4.5 \
|
||||||
ENV HADOOP_VERSION 2.7
|
HADOOP_VERSION=2.7
|
||||||
|
|
||||||
RUN apt-get -y update && \
|
RUN apt-get -y update && \
|
||||||
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
|
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
|
||||||
@@ -17,7 +17,7 @@ RUN apt-get -y update && \
|
|||||||
|
|
||||||
RUN cd /tmp && \
|
RUN cd /tmp && \
|
||||||
wget -q http://mirrors.ukfast.co.uk/sites/ftp.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
|
wget -q http://mirrors.ukfast.co.uk/sites/ftp.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
|
||||||
echo "2E3A5C853B9F28C7D4525C0ADCB0D971B73AD47D5CCE138C85335B9F53A6519540D3923CB0B5CEE41E386E49AE8A409A51AB7194BA11A254E037A848D0C4A9E5 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
|
||||||
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \
|
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \
|
||||||
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
|
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
|
||||||
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
|
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
|
||||||
@@ -36,10 +36,11 @@ RUN apt-get -y update && \
|
|||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Spark and Mesos config
|
# Spark and Mesos config
|
||||||
ENV SPARK_HOME /usr/local/spark
|
ENV SPARK_HOME=/usr/local/spark \
|
||||||
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
|
PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \
|
||||||
ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so
|
MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so \
|
||||||
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
|
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
|
||||||
|
PATH=$PATH:/usr/local/spark/bin
|
||||||
|
|
||||||
USER $NB_UID
|
USER $NB_UID
|
||||||
|
|
||||||
|
19
pyspark-notebook/test/test_spark.py
Normal file
19
pyspark-notebook/test/test_spark.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# Copyright (c) Jupyter Development Team.
|
||||||
|
# Distributed under the terms of the Modified BSD License.
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def test_spark_shell(container):
|
||||||
|
"""Checking if Spark (spark-shell) is running properly"""
|
||||||
|
c = container.run(
|
||||||
|
tty=True,
|
||||||
|
command=['start.sh', 'bash', '-c', 'spark-shell <<< "1+1"']
|
||||||
|
)
|
||||||
|
c.wait(timeout=30)
|
||||||
|
logs = c.logs(stdout=True).decode('utf-8')
|
||||||
|
LOGGER.debug(logs)
|
||||||
|
assert 'res0: Int = 2' in logs
|
5
pytest.ini
Normal file
5
pytest.ini
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
[pytest]
|
||||||
|
log_cli = 1
|
||||||
|
log_cli_level = INFO
|
||||||
|
log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
|
||||||
|
log_cli_date_format=%Y-%m-%d %H:%M:%S
|
Reference in New Issue
Block a user