mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-10 03:23:00 +00:00
Add logger to setup_julia and setup_spark
This commit is contained in:
@@ -6,6 +6,7 @@
|
|||||||
# - Run as the root user
|
# - Run as the root user
|
||||||
# - The JULIA_PKGDIR environment variable is set
|
# - The JULIA_PKGDIR environment variable is set
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import shutil
|
import shutil
|
||||||
@@ -14,6 +15,8 @@ from pathlib import Path
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def unify_aarch64(platform: str) -> str:
|
def unify_aarch64(platform: str) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -31,7 +34,7 @@ def get_latest_julia_url() -> tuple[str, str]:
|
|||||||
Get the last stable version of Julia
|
Get the last stable version of Julia
|
||||||
Based on: https://github.com/JuliaLang/www.julialang.org/issues/878#issuecomment-749234813
|
Based on: https://github.com/JuliaLang/www.julialang.org/issues/878#issuecomment-749234813
|
||||||
"""
|
"""
|
||||||
|
LOGGER.info("Downloading Julia versions information")
|
||||||
versions = requests.get(
|
versions = requests.get(
|
||||||
"https://julialang-s3.julialang.org/bin/versions.json"
|
"https://julialang-s3.julialang.org/bin/versions.json"
|
||||||
).json()
|
).json()
|
||||||
@@ -43,6 +46,7 @@ def get_latest_julia_url() -> tuple[str, str]:
|
|||||||
latest_version_files = stable_versions[latest_stable_version]["files"]
|
latest_version_files = stable_versions[latest_stable_version]["files"]
|
||||||
triplet = unify_aarch64(platform.machine()) + "-linux-gnu"
|
triplet = unify_aarch64(platform.machine()) + "-linux-gnu"
|
||||||
file_info = [vf for vf in latest_version_files if vf["triplet"] == triplet][0]
|
file_info = [vf for vf in latest_version_files if vf["triplet"] == triplet][0]
|
||||||
|
LOGGER.info(f"Latest version: {file_info['version']} url: {file_info['url']}")
|
||||||
return file_info["url"], file_info["version"]
|
return file_info["url"], file_info["version"]
|
||||||
|
|
||||||
|
|
||||||
@@ -51,6 +55,7 @@ def download_julia(julia_url: str) -> None:
|
|||||||
Downloads and unpacks julia
|
Downloads and unpacks julia
|
||||||
The resulting julia directory is "/opt/julia-VERSION/"
|
The resulting julia directory is "/opt/julia-VERSION/"
|
||||||
"""
|
"""
|
||||||
|
LOGGER.info("Downloading and unpacking Julia")
|
||||||
tmp_file = Path("/tmp/julia.tar.gz")
|
tmp_file = Path("/tmp/julia.tar.gz")
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
["curl", "--progress-bar", "--location", "--output", tmp_file, julia_url]
|
["curl", "--progress-bar", "--location", "--output", tmp_file, julia_url]
|
||||||
@@ -59,12 +64,13 @@ def download_julia(julia_url: str) -> None:
|
|||||||
tmp_file.unlink()
|
tmp_file.unlink()
|
||||||
|
|
||||||
|
|
||||||
def prepare_julia(julia_version: str) -> None:
|
def configure_julia(julia_version: str) -> None:
|
||||||
"""
|
"""
|
||||||
Creates /usr/local/bin/julia symlink
|
Creates /usr/local/bin/julia symlink
|
||||||
Make Julia aware of conda libraries
|
Make Julia aware of conda libraries
|
||||||
Creates a directory for Julia user libraries
|
Creates a directory for Julia user libraries
|
||||||
"""
|
"""
|
||||||
|
LOGGER.info("Configuring Julia")
|
||||||
# Link Julia installed version to /usr/local/bin, so julia launches it
|
# Link Julia installed version to /usr/local/bin, so julia launches it
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
["ln", "-fs", f"/opt/julia-{julia_version}/bin/julia", "/usr/local/bin/julia"]
|
["ln", "-fs", f"/opt/julia-{julia_version}/bin/julia", "/usr/local/bin/julia"]
|
||||||
@@ -84,6 +90,8 @@ def prepare_julia(julia_version: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
julia_url, julia_version = get_latest_julia_url()
|
julia_url, julia_version = get_latest_julia_url()
|
||||||
download_julia(julia_url=julia_url)
|
download_julia(julia_url=julia_url)
|
||||||
prepare_julia(julia_version=julia_version)
|
configure_julia(julia_version=julia_version)
|
||||||
|
@@ -7,6 +7,7 @@
|
|||||||
# - Required env variables: SPARK_HOME, HADOOP_VERSION, SPARK_DOWNLOAD_URL
|
# - Required env variables: SPARK_HOME, HADOOP_VERSION, SPARK_DOWNLOAD_URL
|
||||||
# - Optional env variables: SPARK_VERSION, SCALA_VERSION
|
# - Optional env variables: SPARK_VERSION, SCALA_VERSION
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -14,6 +15,8 @@ from pathlib import Path
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_all_refs(url: str) -> list[str]:
|
def get_all_refs(url: str) -> list[str]:
|
||||||
"""
|
"""
|
||||||
@@ -31,6 +34,7 @@ def get_spark_version() -> str:
|
|||||||
"""
|
"""
|
||||||
if (version := os.environ["SPARK_VERSION"]) != "":
|
if (version := os.environ["SPARK_VERSION"]) != "":
|
||||||
return version
|
return version
|
||||||
|
LOGGER.info("Downloading Spark versions information")
|
||||||
all_refs = get_all_refs("https://archive.apache.org/dist/spark/")
|
all_refs = get_all_refs("https://archive.apache.org/dist/spark/")
|
||||||
stable_versions = [
|
stable_versions = [
|
||||||
ref.removeprefix("spark-").removesuffix("/")
|
ref.removeprefix("spark-").removesuffix("/")
|
||||||
@@ -38,9 +42,11 @@ def get_spark_version() -> str:
|
|||||||
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
|
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
|
||||||
]
|
]
|
||||||
# Compare versions semantically
|
# Compare versions semantically
|
||||||
return max(
|
latest_version = max(
|
||||||
stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")]
|
stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")]
|
||||||
)
|
)
|
||||||
|
LOGGER.info(f"Latest version: {latest_version}")
|
||||||
|
return latest_version
|
||||||
|
|
||||||
|
|
||||||
def download_spark(
|
def download_spark(
|
||||||
@@ -53,9 +59,11 @@ def download_spark(
|
|||||||
Downloads and unpacks spark
|
Downloads and unpacks spark
|
||||||
The resulting spark directory name is returned
|
The resulting spark directory name is returned
|
||||||
"""
|
"""
|
||||||
|
LOGGER.info("Downloading and unpacking Spark")
|
||||||
spark_dir_name = f"spark-{spark_version}-bin-hadoop{hadoop_version}"
|
spark_dir_name = f"spark-{spark_version}-bin-hadoop{hadoop_version}"
|
||||||
if scala_version:
|
if scala_version:
|
||||||
spark_dir_name += f"-scala{scala_version}"
|
spark_dir_name += f"-scala{scala_version}"
|
||||||
|
LOGGER.info(f"Spark directory name: {spark_dir_name}")
|
||||||
spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz"
|
spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz"
|
||||||
|
|
||||||
tmp_file = Path("/tmp/spark.tar.gz")
|
tmp_file = Path("/tmp/spark.tar.gz")
|
||||||
@@ -80,11 +88,12 @@ def download_spark(
|
|||||||
return spark_dir_name
|
return spark_dir_name
|
||||||
|
|
||||||
|
|
||||||
def prepare_spark(spark_dir_name: str, spark_home: Path) -> None:
|
def configure_spark(spark_dir_name: str, spark_home: Path) -> None:
|
||||||
"""
|
"""
|
||||||
Creates a ${SPARK_HOME} symlink to a versioned spark directory
|
Creates a ${SPARK_HOME} symlink to a versioned spark directory
|
||||||
Creates a 10spark-config.sh symlink to source PYTHONPATH automatically
|
Creates a 10spark-config.sh symlink to source PYTHONPATH automatically
|
||||||
"""
|
"""
|
||||||
|
LOGGER.info("Configuring Spark")
|
||||||
subprocess.check_call(["ln", "-s", f"/usr/local/{spark_dir_name}", spark_home])
|
subprocess.check_call(["ln", "-s", f"/usr/local/{spark_dir_name}", spark_home])
|
||||||
|
|
||||||
# Add a link in the before_notebook hook in order to source PYTHONPATH automatically
|
# Add a link in the before_notebook hook in order to source PYTHONPATH automatically
|
||||||
@@ -95,6 +104,8 @@ def prepare_spark(spark_dir_name: str, spark_home: Path) -> None:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
spark_version = get_spark_version()
|
spark_version = get_spark_version()
|
||||||
spark_dir_name = download_spark(
|
spark_dir_name = download_spark(
|
||||||
spark_version=spark_version,
|
spark_version=spark_version,
|
||||||
@@ -102,6 +113,6 @@ if __name__ == "__main__":
|
|||||||
scala_version=os.environ["SCALA_VERSION"],
|
scala_version=os.environ["SCALA_VERSION"],
|
||||||
spark_download_url=Path(os.environ["SPARK_DOWNLOAD_URL"]),
|
spark_download_url=Path(os.environ["SPARK_DOWNLOAD_URL"]),
|
||||||
)
|
)
|
||||||
prepare_spark(
|
configure_spark(
|
||||||
spark_dir_name=spark_dir_name, spark_home=Path(os.environ["SPARK_HOME"])
|
spark_dir_name=spark_dir_name, spark_home=Path(os.environ["SPARK_HOME"])
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user