mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-10 19:42:58 +00:00
Add logger to setup_julia and setup_spark
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
# - Run as the root user
|
||||
# - The JULIA_PKGDIR environment variable is set
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
@@ -14,6 +15,8 @@ from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def unify_aarch64(platform: str) -> str:
|
||||
"""
|
||||
@@ -31,7 +34,7 @@ def get_latest_julia_url() -> tuple[str, str]:
|
||||
Get the last stable version of Julia
|
||||
Based on: https://github.com/JuliaLang/www.julialang.org/issues/878#issuecomment-749234813
|
||||
"""
|
||||
|
||||
LOGGER.info("Downloading Julia versions information")
|
||||
versions = requests.get(
|
||||
"https://julialang-s3.julialang.org/bin/versions.json"
|
||||
).json()
|
||||
@@ -43,6 +46,7 @@ def get_latest_julia_url() -> tuple[str, str]:
|
||||
latest_version_files = stable_versions[latest_stable_version]["files"]
|
||||
triplet = unify_aarch64(platform.machine()) + "-linux-gnu"
|
||||
file_info = [vf for vf in latest_version_files if vf["triplet"] == triplet][0]
|
||||
LOGGER.info(f"Latest version: {file_info['version']} url: {file_info['url']}")
|
||||
return file_info["url"], file_info["version"]
|
||||
|
||||
|
||||
@@ -51,6 +55,7 @@ def download_julia(julia_url: str) -> None:
|
||||
Downloads and unpacks julia
|
||||
The resulting julia directory is "/opt/julia-VERSION/"
|
||||
"""
|
||||
LOGGER.info("Downloading and unpacking Julia")
|
||||
tmp_file = Path("/tmp/julia.tar.gz")
|
||||
subprocess.check_call(
|
||||
["curl", "--progress-bar", "--location", "--output", tmp_file, julia_url]
|
||||
@@ -59,12 +64,13 @@ def download_julia(julia_url: str) -> None:
|
||||
tmp_file.unlink()
|
||||
|
||||
|
||||
def prepare_julia(julia_version: str) -> None:
|
||||
def configure_julia(julia_version: str) -> None:
|
||||
"""
|
||||
Creates /usr/local/bin/julia symlink
|
||||
Make Julia aware of conda libraries
|
||||
Creates a directory for Julia user libraries
|
||||
"""
|
||||
LOGGER.info("Configuring Julia")
|
||||
# Link Julia installed version to /usr/local/bin, so julia launches it
|
||||
subprocess.check_call(
|
||||
["ln", "-fs", f"/opt/julia-{julia_version}/bin/julia", "/usr/local/bin/julia"]
|
||||
@@ -84,6 +90,8 @@ def prepare_julia(julia_version: str) -> None:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
julia_url, julia_version = get_latest_julia_url()
|
||||
download_julia(julia_url=julia_url)
|
||||
prepare_julia(julia_version=julia_version)
|
||||
configure_julia(julia_version=julia_version)
|
||||
|
@@ -7,6 +7,7 @@
|
||||
# - Required env variables: SPARK_HOME, HADOOP_VERSION, SPARK_DOWNLOAD_URL
|
||||
# - Optional env variables: SPARK_VERSION, SCALA_VERSION
|
||||
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
@@ -14,6 +15,8 @@ from pathlib import Path
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_all_refs(url: str) -> list[str]:
|
||||
"""
|
||||
@@ -31,6 +34,7 @@ def get_spark_version() -> str:
|
||||
"""
|
||||
if (version := os.environ["SPARK_VERSION"]) != "":
|
||||
return version
|
||||
LOGGER.info("Downloading Spark versions information")
|
||||
all_refs = get_all_refs("https://archive.apache.org/dist/spark/")
|
||||
stable_versions = [
|
||||
ref.removeprefix("spark-").removesuffix("/")
|
||||
@@ -38,9 +42,11 @@ def get_spark_version() -> str:
|
||||
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
|
||||
]
|
||||
# Compare versions semantically
|
||||
return max(
|
||||
latest_version = max(
|
||||
stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")]
|
||||
)
|
||||
LOGGER.info(f"Latest version: {latest_version}")
|
||||
return latest_version
|
||||
|
||||
|
||||
def download_spark(
|
||||
@@ -53,9 +59,11 @@ def download_spark(
|
||||
Downloads and unpacks spark
|
||||
The resulting spark directory name is returned
|
||||
"""
|
||||
LOGGER.info("Downloading and unpacking Spark")
|
||||
spark_dir_name = f"spark-{spark_version}-bin-hadoop{hadoop_version}"
|
||||
if scala_version:
|
||||
spark_dir_name += f"-scala{scala_version}"
|
||||
LOGGER.info(f"Spark directory name: {spark_dir_name}")
|
||||
spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz"
|
||||
|
||||
tmp_file = Path("/tmp/spark.tar.gz")
|
||||
@@ -80,11 +88,12 @@ def download_spark(
|
||||
return spark_dir_name
|
||||
|
||||
|
||||
def prepare_spark(spark_dir_name: str, spark_home: Path) -> None:
|
||||
def configure_spark(spark_dir_name: str, spark_home: Path) -> None:
|
||||
"""
|
||||
Creates a ${SPARK_HOME} symlink to a versioned spark directory
|
||||
Creates a 10spark-config.sh symlink to source PYTHONPATH automatically
|
||||
"""
|
||||
LOGGER.info("Configuring Spark")
|
||||
subprocess.check_call(["ln", "-s", f"/usr/local/{spark_dir_name}", spark_home])
|
||||
|
||||
# Add a link in the before_notebook hook in order to source PYTHONPATH automatically
|
||||
@@ -95,6 +104,8 @@ def prepare_spark(spark_dir_name: str, spark_home: Path) -> None:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
spark_version = get_spark_version()
|
||||
spark_dir_name = download_spark(
|
||||
spark_version=spark_version,
|
||||
@@ -102,6 +113,6 @@ if __name__ == "__main__":
|
||||
scala_version=os.environ["SCALA_VERSION"],
|
||||
spark_download_url=Path(os.environ["SPARK_DOWNLOAD_URL"]),
|
||||
)
|
||||
prepare_spark(
|
||||
configure_spark(
|
||||
spark_dir_name=spark_dir_name, spark_home=Path(os.environ["SPARK_HOME"])
|
||||
)
|
||||
|
Reference in New Issue
Block a user