From e84bfdf4aece9ae68889ce1ffa400e9d585a3811 Mon Sep 17 00:00:00 2001 From: Ayaz Salikhov Date: Sun, 7 Jan 2024 15:33:30 +0400 Subject: [PATCH] Add logger to setup_julia and setup_spark --- .../setup-scripts/setup_julia.py | 14 +++++++++++--- images/pyspark-notebook/setup_spark.py | 17 ++++++++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/images/minimal-notebook/setup-scripts/setup_julia.py b/images/minimal-notebook/setup-scripts/setup_julia.py index 7f2478d1..114e64c0 100755 --- a/images/minimal-notebook/setup-scripts/setup_julia.py +++ b/images/minimal-notebook/setup-scripts/setup_julia.py @@ -6,6 +6,7 @@ # - Run as the root user # - The JULIA_PKGDIR environment variable is set +import logging import os import platform import shutil @@ -14,6 +15,8 @@ from pathlib import Path import requests +LOGGER = logging.getLogger(__name__) + def unify_aarch64(platform: str) -> str: """ @@ -31,7 +34,7 @@ def get_latest_julia_url() -> tuple[str, str]: Get the last stable version of Julia Based on: https://github.com/JuliaLang/www.julialang.org/issues/878#issuecomment-749234813 """ - + LOGGER.info("Downloading Julia versions information") versions = requests.get( "https://julialang-s3.julialang.org/bin/versions.json" ).json() @@ -43,6 +46,7 @@ def get_latest_julia_url() -> tuple[str, str]: latest_version_files = stable_versions[latest_stable_version]["files"] triplet = unify_aarch64(platform.machine()) + "-linux-gnu" file_info = [vf for vf in latest_version_files if vf["triplet"] == triplet][0] + LOGGER.info(f"Latest version: {file_info['version']} url: {file_info['url']}") return file_info["url"], file_info["version"] @@ -51,6 +55,7 @@ def download_julia(julia_url: str) -> None: Downloads and unpacks julia The resulting julia directory is "/opt/julia-VERSION/" """ + LOGGER.info("Downloading and unpacking Julia") tmp_file = Path("/tmp/julia.tar.gz") subprocess.check_call( ["curl", "--progress-bar", "--location", "--output", tmp_file, julia_url] @@ -59,12 +64,13 @@ def download_julia(julia_url: str) -> None: tmp_file.unlink() -def prepare_julia(julia_version: str) -> None: +def configure_julia(julia_version: str) -> None: """ Creates /usr/local/bin/julia symlink Make Julia aware of conda libraries Creates a directory for Julia user libraries """ + LOGGER.info("Configuring Julia") # Link Julia installed version to /usr/local/bin, so julia launches it subprocess.check_call( ["ln", "-fs", f"/opt/julia-{julia_version}/bin/julia", "/usr/local/bin/julia"] @@ -84,6 +90,8 @@ def prepare_julia(julia_version: str) -> None: if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + julia_url, julia_version = get_latest_julia_url() download_julia(julia_url=julia_url) - prepare_julia(julia_version=julia_version) + configure_julia(julia_version=julia_version) diff --git a/images/pyspark-notebook/setup_spark.py b/images/pyspark-notebook/setup_spark.py index 54e59948..3481cc70 100755 --- a/images/pyspark-notebook/setup_spark.py +++ b/images/pyspark-notebook/setup_spark.py @@ -7,6 +7,7 @@ # - Required env variables: SPARK_HOME, HADOOP_VERSION, SPARK_DOWNLOAD_URL # - Optional env variables: SPARK_VERSION, SCALA_VERSION +import logging import os import subprocess from pathlib import Path @@ -14,6 +15,8 @@ from pathlib import Path import requests from bs4 import BeautifulSoup +LOGGER = logging.getLogger(__name__) + def get_all_refs(url: str) -> list[str]: """ @@ -31,6 +34,7 @@ def get_spark_version() -> str: """ if (version := os.environ["SPARK_VERSION"]) != "": return version + LOGGER.info("Downloading Spark versions information") all_refs = get_all_refs("https://archive.apache.org/dist/spark/") stable_versions = [ ref.removeprefix("spark-").removesuffix("/") @@ -38,9 +42,11 @@ def get_spark_version() -> str: if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref ] # Compare versions semantically - return max( + latest_version = max( stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")] ) + LOGGER.info(f"Latest version: {latest_version}") + return latest_version def download_spark( @@ -53,9 +59,11 @@ def download_spark( Downloads and unpacks spark The resulting spark directory name is returned """ + LOGGER.info("Downloading and unpacking Spark") spark_dir_name = f"spark-{spark_version}-bin-hadoop{hadoop_version}" if scala_version: spark_dir_name += f"-scala{scala_version}" + LOGGER.info(f"Spark directory name: {spark_dir_name}") spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz" tmp_file = Path("/tmp/spark.tar.gz") @@ -80,11 +88,12 @@ def download_spark( return spark_dir_name -def prepare_spark(spark_dir_name: str, spark_home: Path) -> None: +def configure_spark(spark_dir_name: str, spark_home: Path) -> None: """ Creates a ${SPARK_HOME} symlink to a versioned spark directory Creates a 10spark-config.sh symlink to source PYTHONPATH automatically """ + LOGGER.info("Configuring Spark") subprocess.check_call(["ln", "-s", f"/usr/local/{spark_dir_name}", spark_home]) # Add a link in the before_notebook hook in order to source PYTHONPATH automatically @@ -95,6 +104,8 @@ def prepare_spark(spark_dir_name: str, spark_home: Path) -> None: if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + spark_version = get_spark_version() spark_dir_name = download_spark( spark_version=spark_version, @@ -102,6 +113,6 @@ if __name__ == "__main__": scala_version=os.environ["SCALA_VERSION"], spark_download_url=Path(os.environ["SPARK_DOWNLOAD_URL"]), ) - prepare_spark( + configure_spark( spark_dir_name=spark_dir_name, spark_home=Path(os.environ["SPARK_HOME"]) )