log hook messages to stderr (#2293)

2025-10-07 01:54:04 +00:00 · 2025-04-30 09:59:49 +02:00
parent 5b8d531aca
commit 1eacd4ee78
6 changed files with 136 additions and 50 deletions
--- a/.flake8
+++ b/.flake8
@@ -1,4 +1,4 @@
 [flake8]
 max-line-length = 88
 select = C, E, F, W, B, B950
-extend-ignore = E203, E501, W503
+extend-ignore = E203, E501, E704, W503
--- a/images/docker-stacks-foundation/run-hooks.sh
+++ b/images/docker-stacks-foundation/run-hooks.sh
@@ -2,45 +2,55 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.

+# identical _log to start.sh
+# only used when _not_ sourced from start.sh (i.e. unittests)
+if ! declare -F _log > /dev/null; then
+    _log () {
+        if [[ "$*" == "ERROR:"* ]] || [[ "$*" == "WARNING:"* ]] || [[ "${JUPYTER_DOCKER_STACKS_QUIET}" == "" ]]; then
+            echo "$@" >&2
+        fi
+    }
+fi
+
 # The run-hooks.sh script looks for *.sh scripts to source
 # and executable files to run within a passed directory

 if [ "$#" -ne 1 ]; then
-    echo "Should pass exactly one directory"
+    _log "ERROR: Should pass exactly one directory"
    return 1
 fi

 if [[ ! -d "${1}" ]]; then
-    echo "Directory ${1} doesn't exist or is not a directory"
+    _log "ERROR: Directory ${1} doesn't exist or is not a directory"
    return 1
 fi

-echo "Running hooks in: ${1} as uid: $(id -u) gid: $(id -g)"
+_log "Running hooks in: ${1} as uid: $(id -u) gid: $(id -g)"
 for f in "${1}/"*; do
    # Handling a case when the directory is empty
    [ -e "${f}" ] || continue
    case "${f}" in
        *.sh)
-            echo "Sourcing shell script: ${f}"
+            _log "Sourcing shell script: ${f}"
            # shellcheck disable=SC1090
            source "${f}"
            # shellcheck disable=SC2181
            if [ $? -ne 0 ]; then
-                echo "${f} has failed, continuing execution"
+                _log "ERROR: ${f} has failed, continuing execution"
            fi
            ;;
        *)
            if [ -x "${f}" ]; then
-                echo "Running executable: ${f}"
+                _log "Running executable: ${f}"
                "${f}"
                # shellcheck disable=SC2181
                if [ $? -ne 0 ]; then
-                    echo "${f} has failed, continuing execution"
+                    _log "ERROR: ${f} has failed, continuing execution"
                fi
            else
-                echo "Ignoring non-executable: ${f}"
+                _log "Ignoring non-executable: ${f}"
            fi
            ;;
    esac
 done
-echo "Done running hooks in: ${1}"
+_log "Done running hooks in: ${1}"
--- a/images/docker-stacks-foundation/start.sh
+++ b/images/docker-stacks-foundation/start.sh
@@ -9,7 +9,7 @@ set -e
 # by setting the JUPYTER_DOCKER_STACKS_QUIET environment variable.
 _log () {
    if [[ "$*" == "ERROR:"* ]] || [[ "$*" == "WARNING:"* ]] || [[ "${JUPYTER_DOCKER_STACKS_QUIET}" == "" ]]; then
-        echo "$@"
+        echo "$@" >&2
    fi
 }
 _log "Entered start.sh with args:" "$@"
--- a/tests/by_image/docker-stacks-foundation/test_run_hooks.py
+++ b/tests/by_image/docker-stacks-foundation/test_run_hooks.py
@@ -1,5 +1,7 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
+from __future__ import annotations
+
 import logging
 from pathlib import Path

@@ -10,62 +12,77 @@ THIS_DIR = Path(__file__).parent.resolve()


 def test_run_hooks_zero_args(container: TrackedContainer) -> None:
-    logs = container.run_and_wait(
+    stdout, stderr = container.run_and_wait(
        timeout=10,
+        no_errors=False,
        no_failure=False,
+        split_stderr=True,
        command=["bash", "-c", "source /usr/local/bin/run-hooks.sh"],
    )
-    assert "Should pass exactly one directory" in logs
+    assert not stdout
+    assert "Should pass exactly one directory" in stderr


 def test_run_hooks_two_args(container: TrackedContainer) -> None:
-    logs = container.run_and_wait(
+    stdout, stderr = container.run_and_wait(
        timeout=10,
+        no_errors=False,
        no_failure=False,
+        split_stderr=True,
        command=[
            "bash",
            "-c",
            "source /usr/local/bin/run-hooks.sh first-arg second-arg",
        ],
    )
-    assert "Should pass exactly one directory" in logs
+    assert not stdout
+    assert "Should pass exactly one directory" in stderr


 def test_run_hooks_missing_dir(container: TrackedContainer) -> None:
-    logs = container.run_and_wait(
+    stdout, stderr = container.run_and_wait(
        timeout=10,
+        no_errors=False,
        no_failure=False,
+        split_stderr=True,
        command=[
            "bash",
            "-c",
            "source /usr/local/bin/run-hooks.sh /tmp/missing-dir/",
        ],
    )
-    assert "Directory /tmp/missing-dir/ doesn't exist or is not a directory" in logs
+    assert not stdout
+    assert "Directory /tmp/missing-dir/ doesn't exist or is not a directory" in stderr


 def test_run_hooks_dir_is_file(container: TrackedContainer) -> None:
-    logs = container.run_and_wait(
+    stdout, stderr = container.run_and_wait(
        timeout=10,
+        no_errors=False,
        no_failure=False,
+        split_stderr=True,
        command=[
            "bash",
            "-c",
            "touch /tmp/some-file && source /usr/local/bin/run-hooks.sh /tmp/some-file",
        ],
    )
-    assert "Directory /tmp/some-file doesn't exist or is not a directory" in logs
+    assert not stdout
+    assert "Directory /tmp/some-file doesn't exist or is not a directory" in stderr


 def test_run_hooks_empty_dir(container: TrackedContainer) -> None:
-    container.run_and_wait(
+    stdout, stderr = container.run_and_wait(
        timeout=10,
+        split_stderr=True,
        command=[
            "bash",
            "-c",
            "mkdir /tmp/empty-dir && source /usr/local/bin/run-hooks.sh /tmp/empty-dir/",
        ],
    )
+    assert not stdout
+    assert "Running hooks in: /tmp/empty-dir/" in stderr


 def run_source_in_dir(
@@ -73,8 +90,9 @@ def run_source_in_dir(
    *,
    subdir: str,
    command_suffix: str = "",
+    no_errors: bool = True,
    no_failure: bool = True,
-) -> str:
+) -> tuple[str, str]:
    host_data_dir = THIS_DIR / subdir
    cont_data_dir = "/home/jovyan/data"
    # https://forums.docker.com/t/all-files-appear-as-executable-in-file-paths-using-bind-mount/99921
@@ -87,65 +105,70 @@ def run_source_in_dir(
    return container.run_and_wait(
        timeout=10,
        volumes={host_data_dir: {"bind": cont_data_dir, "mode": "ro"}},
+        no_errors=no_errors,
        no_failure=no_failure,
+        split_stderr=True,
        command=["bash", "-c", command],
    )


 def test_run_hooks_change(container: TrackedContainer) -> None:
-    logs = run_source_in_dir(container, subdir="data/run-hooks/change")
+    stdout, logs = run_source_in_dir(container, subdir="data/run-hooks/change")

-    assert "Inside a.sh MY_VAR variable has 123 value" in logs
-    assert "Inside b.sh MY_VAR variable has 123 value" in logs
-    assert "Changing value of MY_VAR" in logs
-    assert "After change inside b.sh MY_VAR variable has 456 value" in logs
-    assert "Inside c.sh MY_VAR variable has 456 value" in logs
+    assert "Inside a.sh MY_VAR variable has 123 value" in stdout
+    assert "Inside b.sh MY_VAR variable has 123 value" in stdout
+    assert "Changing value of MY_VAR" in stdout
+    assert "After change inside b.sh MY_VAR variable has 456 value" in stdout
+    assert "Inside c.sh MY_VAR variable has 456 value" in stdout


 def test_run_hooks_executables(container: TrackedContainer) -> None:
-    logs = run_source_in_dir(
+    stdout, logs = run_source_in_dir(
        container,
        subdir="data/run-hooks/executables",
        command_suffix="&& echo SOME_VAR is ${SOME_VAR}",
    )

-    assert "Executable python file was successfully run" in logs
+    assert "Executable python file was successfully run" in stdout
    assert "Ignoring non-executable: /home/jovyan/data-copy//non_executable.py" in logs
-    assert "SOME_VAR is 123" in logs
+    assert "SOME_VAR is 123" in stdout


 def test_run_hooks_failures(container: TrackedContainer) -> None:
-    logs = run_source_in_dir(
-        container, subdir="data/run-hooks/failures", no_failure=False
+    stdout, logs = run_source_in_dir(
+        container,
+        subdir="data/run-hooks/failures",
+        no_errors=False,
+        no_failure=False,
    )

    for file in ["a.sh", "b.py", "c.sh", "d.sh"]:
-        assert f"Started: {file}" in logs
+        assert f"Started: {file}" in stdout

    for file in ["a.sh"]:
-        assert f"Finished: {file}" in logs
+        assert f"Finished: {file}" in stdout
    for file in ["b.py", "c.sh", "d.sh"]:
-        assert f"Finished: {file}" not in logs
+        assert f"Finished: {file}" not in stdout

    for file in ["b.py", "c.sh"]:
        assert (
            f"/home/jovyan/data-copy//{file} has failed, continuing execution" in logs
        )

-    assert "OTHER_VAR=456" in logs
+    assert "OTHER_VAR=456" in stdout


 def test_run_hooks_sh_files(container: TrackedContainer) -> None:
-    logs = run_source_in_dir(container, subdir="data/run-hooks/sh-files")
+    stdout, _ = run_source_in_dir(container, subdir="data/run-hooks/sh-files")

-    assert "Inside executable.sh MY_VAR variable has 0 value" in logs
-    assert "Inside non-executable.sh MY_VAR variable has 1 value" in logs
+    assert "Inside executable.sh MY_VAR variable has 0 value" in stdout
+    assert "Inside non-executable.sh MY_VAR variable has 1 value" in stdout


 def test_run_hooks_unset(container: TrackedContainer) -> None:
-    logs = run_source_in_dir(container, subdir="data/run-hooks/unset")
+    stdout, _ = run_source_in_dir(container, subdir="data/run-hooks/unset")

-    assert "Inside a.sh MY_VAR variable has 123 value" in logs
-    assert "Inside b.sh MY_VAR variable has 123 value" in logs
-    assert "Unsetting MY_VAR" in logs
-    assert "Inside c.sh MY_VAR variable has  value" in logs
+    assert "Inside a.sh MY_VAR variable has 123 value" in stdout
+    assert "Inside b.sh MY_VAR variable has 123 value" in stdout
+    assert "Unsetting MY_VAR" in stdout
+    assert "Inside c.sh MY_VAR variable has  value" in stdout
--- a/tests/by_image/docker-stacks-foundation/test_user_options.py
+++ b/tests/by_image/docker-stacks-foundation/test_user_options.py
@@ -327,3 +327,19 @@ def test_rootless_triplet_sudo(container: TrackedContainer) -> None:
        command=["env"],
    )
    assert "SUDO" not in logs
+
+
+def test_log_stderr(container: TrackedContainer) -> None:
+    """Logs should go to stderr, not stdout"""
+    stdout, stderr = container.run_and_wait(
+        timeout=10,
+        user="root",
+        environment=["NB_USER=root", "NB_UID=0", "NB_GID=0"],
+        command=["echo", "stdout"],
+        split_stderr=True,
+    )
+    # no logs should be on stdout
+    assert stdout.strip() == "stdout"
+    # check that logs were captured
+    assert "Entered start.sh" in stderr
+    assert "Running as root" in stderr
--- a/tests/utils/tracked_container.py
+++ b/tests/utils/tracked_container.py
@@ -1,7 +1,7 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 import logging
-from typing import Any, LiteralString
+from typing import Any, Literal, LiteralString, overload

 import docker
 from docker.models.containers import Container
@@ -54,9 +54,9 @@ class TrackedContainer:
        )
        LOGGER.info(f"Container {self.container.name} created")

-    def get_logs(self) -> str:
+    def get_logs(self, *, stdout: bool = True, stderr: bool = True) -> str:
        assert self.container is not None
-        logs = self.container.logs().decode()
+        logs = self.container.logs(stdout=stdout, stderr=stderr).decode()
        assert isinstance(logs, str)
        return logs

@@ -82,6 +82,7 @@ class TrackedContainer:
            LOGGER.debug(f"Command output:\n{output}")
        return output

+    @overload
    def run_and_wait(
        self,
        timeout: int,
@@ -89,11 +90,44 @@ class TrackedContainer:
        no_warnings: bool = True,
        no_errors: bool = True,
        no_failure: bool = True,
+        split_stderr: Literal[True],
        **kwargs: Any,
-    ) -> str:
+    ) -> tuple[str, str]: ...
+
+    @overload
+    def run_and_wait(
+        self,
+        timeout: int,
+        *,
+        no_warnings: bool = True,
+        no_errors: bool = True,
+        no_failure: bool = True,
+        split_stderr: Literal[False] = False,
+        **kwargs: Any,
+    ) -> str: ...
+
+    def run_and_wait(
+        self,
+        timeout: int,
+        *,
+        no_warnings: bool = True,
+        no_errors: bool = True,
+        no_failure: bool = True,
+        split_stderr: bool = False,
+        **kwargs: Any,
+    ) -> str | tuple[str, str]:
+        if split_stderr:
+            kwargs.setdefault("tty", False)
+            assert kwargs["tty"] is False, "split_stderr only works with tty=False"
        self.run_detached(**kwargs)
        assert self.container is not None
        rv = self.container.wait(timeout=timeout)
+        stdout: str
+        stderr: str
+        if split_stderr:
+            stdout = self.get_logs(stdout=True, stderr=False)
+            stderr = logs = self.get_logs(stdout=False, stderr=True)
+        else:
            logs = self.get_logs()
        rc_success = rv["StatusCode"] == 0
        should_report = not (
@@ -113,6 +147,9 @@ class TrackedContainer:
        assert no_warnings == (not self.get_warnings(logs))
        assert no_errors == (not self.get_errors(logs))

+        if split_stderr:
+            return (stdout, stderr)
+        else:
            return logs

    @staticmethod