Bump to spark 2.4.5 + minor improvements

This commit is contained in:
romainx
2020-02-11 21:30:47 +01:00
parent 3deefc7d16
commit 45d51e3b42
6 changed files with 52 additions and 9 deletions

View File

@@ -90,7 +90,7 @@ tx-en: ## rebuild en locale strings and push to master (req: GH_TOKEN)
test/%: ## run tests against a stack
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test $(notdir $@)/test
test/base-notebook: ## test supported options in the base notebook
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test base-notebook/test

View File

@@ -78,6 +78,20 @@ def test_chown_extra(container):
assert '/opt/conda/LICENSE.txt:1010:101' in c.logs(stdout=True).decode('utf-8')
def test_chown_home(container):
"""Container should change the NB_USER home directory owner and
group to the current value of NB_UID and NB_GID."""
c = container.run(
tty=True,
user='root',
environment=['CHOWN_HOME=yes',
'CHOWN_HOME_OPTS=-R',
],
command=['start.sh', 'bash', '-c', 'chown root:root /home/jovyan && ls -alsh /home']
)
assert "Changing ownership of /home/jovyan to 1000:100 with options '-R'" in c.logs(stdout=True).decode('utf-8')
def test_sudo(container):
"""Container should grant passwordless sudo to the default user."""
c = container.run(

View File

@@ -1,6 +1,7 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import os
import logging
import docker
import pytest
@@ -10,6 +11,8 @@ from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
LOGGER = logging.getLogger(__name__)
@pytest.fixture(scope='session')
def http_client():
"""Requests session with retries and backoff."""
@@ -72,6 +75,7 @@ class TrackedContainer(object):
all_kwargs = {}
all_kwargs.update(self.kwargs)
all_kwargs.update(kwargs)
LOGGER.info(f"Running {self.image_name} with args {all_kwargs} ...")
self.container = self.docker_client.containers.run(self.image_name, **all_kwargs)
return self.container

View File

@@ -8,8 +8,8 @@ LABEL maintainer="Jupyter Project <jupyter@googlegroups.com>"
USER root
# Spark dependencies
ENV APACHE_SPARK_VERSION 2.4.4
ENV HADOOP_VERSION 2.7
ENV APACHE_SPARK_VERSION=2.4.5 \
HADOOP_VERSION=2.7
RUN apt-get -y update && \
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
@@ -17,7 +17,7 @@ RUN apt-get -y update && \
RUN cd /tmp && \
wget -q http://mirrors.ukfast.co.uk/sites/ftp.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
echo "2E3A5C853B9F28C7D4525C0ADCB0D971B73AD47D5CCE138C85335B9F53A6519540D3923CB0B5CEE41E386E49AE8A409A51AB7194BA11A254E037A848D0C4A9E5 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
@@ -36,10 +36,11 @@ RUN apt-get -y update && \
rm -rf /var/lib/apt/lists/*
# Spark and Mesos config
ENV SPARK_HOME /usr/local/spark
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
ENV SPARK_HOME=/usr/local/spark \
PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \
MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so \
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
PATH=$PATH:/usr/local/spark/bin
USER $NB_UID

View File

@@ -0,0 +1,19 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import time
import logging
import pytest
LOGGER = logging.getLogger(__name__)
def test_spark_shell(container):
"""Checking if Spark (spark-shell) is running properly"""
c = container.run(
tty=True,
command=['start.sh', 'bash', '-c', 'spark-shell <<< "1+1"']
)
c.wait(timeout=30)
logs = c.logs(stdout=True).decode('utf-8')
LOGGER.debug(logs)
assert 'res0: Int = 2' in logs

5
pytest.ini Normal file
View File

@@ -0,0 +1,5 @@
[pytest]
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
log_cli_date_format=%Y-%m-%d %H:%M:%S