Automatically install latest spark version (#2075)

* Automatically install latest pyspark version

* Better text

* Do not use shutil to keep behaviour

* Make setup_script cwd independent

* Use _get_program_version to calculate spark version

* Update setup_spark.py reqs

* Update setup_spark.py

* Add info about HADOOP_VERSION

* Add customization back

* Better text

* Specify build args when they are actually needed

* Better text

* Better code

* Better code

* Better text

* Get rid of warning

* Improve code

* Remove information about checksum

* Better text
This commit is contained in:
Ayaz Salikhov
2024-01-07 10:01:23 +04:00
committed by GitHub
parent c1229303d0
commit c294e9e2d9
5 changed files with 155 additions and 45 deletions

View File

@@ -128,7 +128,12 @@ class JuliaVersionTagger(TaggerInterface):
class SparkVersionTagger(TaggerInterface):
@staticmethod
def tag_value(container: Container) -> str:
return "spark-" + _get_env_variable(container, "APACHE_SPARK_VERSION")
SPARK_VERSION_LINE_PREFIX = r" /___/ .__/\_,_/_/ /_/\_\ version"
spark_version = _get_program_version(container, "spark-submit")
version_line = spark_version.split("\n")[4]
assert version_line.startswith(SPARK_VERSION_LINE_PREFIX)
return "spark-" + version_line.split(" ")[-1]
class HadoopVersionTagger(TaggerInterface):