Do not bloat spark image with ENV variables (#2081)

* Do not bloat spark image with ENV variables

* Remove HadoopVersionTagger
This commit is contained in:
Ayaz Salikhov
2024-01-17 13:34:33 +04:00
committed by GitHub
parent d57bf9590d
commit bf33945b9e
3 changed files with 6 additions and 27 deletions

View File

@@ -34,12 +34,6 @@ ARG scala_version
# But it seems to be slower, that's why we use the recommended site for download # But it seems to be slower, that's why we use the recommended site for download
ARG spark_download_url="https://dlcdn.apache.org/spark/" ARG spark_download_url="https://dlcdn.apache.org/spark/"
# Configure Spark
ENV SPARK_VERSION="${spark_version}" \
HADOOP_VERSION="${hadoop_version}" \
SCALA_VERSION="${scala_version}" \
SPARK_DOWNLOAD_URL="${spark_download_url}"
ENV SPARK_HOME=/usr/local/spark ENV SPARK_HOME=/usr/local/spark
ENV PATH="${PATH}:${SPARK_HOME}/bin" ENV PATH="${PATH}:${SPARK_HOME}/bin"
ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info"
@@ -47,7 +41,11 @@ ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M
COPY setup_spark.py /opt/setup-scripts/ COPY setup_spark.py /opt/setup-scripts/
# Setup Spark # Setup Spark
RUN /opt/setup-scripts/setup_spark.py RUN SPARK_VERSION="${spark_version}" \
HADOOP_VERSION="${hadoop_version}" \
SCALA_VERSION="${scala_version}" \
SPARK_DOWNLOAD_URL="${spark_download_url}" \
/opt/setup-scripts/setup_spark.py
# Configure IPython system-wide # Configure IPython system-wide
COPY ipython_kernel_config.py "/etc/ipython/" COPY ipython_kernel_config.py "/etc/ipython/"

View File

@@ -13,7 +13,6 @@ from tagging.manifests import (
) )
from tagging.taggers import ( from tagging.taggers import (
DateTagger, DateTagger,
HadoopVersionTagger,
JavaVersionTagger, JavaVersionTagger,
JuliaVersionTagger, JuliaVersionTagger,
JupyterHubVersionTagger, JupyterHubVersionTagger,
@@ -83,7 +82,7 @@ ALL_IMAGES = {
), ),
"pyspark-notebook": ImageDescription( "pyspark-notebook": ImageDescription(
parent_image="scipy-notebook", parent_image="scipy-notebook",
taggers=[SparkVersionTagger(), HadoopVersionTagger(), JavaVersionTagger()], taggers=[SparkVersionTagger(), JavaVersionTagger()],
manifests=[SparkInfoManifest()], manifests=[SparkInfoManifest()],
), ),
"all-spark-notebook": ImageDescription( "all-spark-notebook": ImageDescription(

View File

@@ -12,18 +12,6 @@ def _get_program_version(container: Container, program: str) -> str:
return DockerRunner.run_simple_command(container, cmd=f"{program} --version") return DockerRunner.run_simple_command(container, cmd=f"{program} --version")
def _get_env_variable(container: Container, variable: str) -> str:
env = DockerRunner.run_simple_command(
container,
cmd="env",
print_result=False,
).split()
for env_entry in env:
if env_entry.startswith(variable):
return env_entry[len(variable) + 1 :]
raise KeyError(variable)
def _get_pip_package_version(container: Container, package: str) -> str: def _get_pip_package_version(container: Container, package: str) -> str:
PIP_VERSION_PREFIX = "Version: " PIP_VERSION_PREFIX = "Version: "
@@ -136,12 +124,6 @@ class SparkVersionTagger(TaggerInterface):
return "spark-" + version_line.split(" ")[-1] return "spark-" + version_line.split(" ")[-1]
class HadoopVersionTagger(TaggerInterface):
@staticmethod
def tag_value(container: Container) -> str:
return "hadoop-" + _get_env_variable(container, "HADOOP_VERSION")
class JavaVersionTagger(TaggerInterface): class JavaVersionTagger(TaggerInterface):
@staticmethod @staticmethod
def tag_value(container: Container) -> str: def tag_value(container: Container) -> str: