diff --git a/pyspark-notebook/Dockerfile b/pyspark-notebook/Dockerfile index e9578b19..00bb8f01 100644 --- a/pyspark-notebook/Dockerfile +++ b/pyspark-notebook/Dockerfile @@ -53,6 +53,10 @@ RUN cp -p "${SPARK_HOME}/conf/spark-defaults.conf.template" "${SPARK_HOME}/conf/ echo 'spark.driver.extraJavaOptions -Dio.netty.tryReflectionSetAccessible=true' >> "${SPARK_HOME}/conf/spark-defaults.conf" && \ echo 'spark.executor.extraJavaOptions -Dio.netty.tryReflectionSetAccessible=true' >> "${SPARK_HOME}/conf/spark-defaults.conf" +# Configure IPython system-wide +COPY ipython_kernel_config.py "/etc/ipython/" +RUN fix-permissions "/etc/ipython/" + USER ${NB_UID} # Install pyarrow diff --git a/pyspark-notebook/ipython_kernel_config.py b/pyspark-notebook/ipython_kernel_config.py new file mode 100644 index 00000000..8aac8fc7 --- /dev/null +++ b/pyspark-notebook/ipython_kernel_config.py @@ -0,0 +1,13 @@ +# Configuration file for ipython-kernel. +# See + +# With IPython >= 6.0.0, all outputs to stdout/stderr are captured. +# It is the case for subprocesses and output of compiled libraries like Spark. +# Those logs now both head to notebook logs and in notebooks outputs. +# Logs are particularly verbose with Spark, this why we turn them off through this flag. +# + +# Attempt to capture and forward low-level output, e.g. produced by Extension +# libraries. +# Default: True +c.IPKernelApp.capture_fd_output = False # noqa: F821