diff --git a/jupyterhub/app.py b/jupyterhub/app.py index ba7dd174..880b97f7 100644 --- a/jupyterhub/app.py +++ b/jupyterhub/app.py @@ -57,6 +57,7 @@ from .traitlets import URLPrefix, Command from .utils import ( url_path_join, ISO8601_ms, ISO8601_s, + print_stacks, print_ps_info, ) # classes for config from .auth import Authenticator, PAMAuthenticator @@ -1675,6 +1676,14 @@ class JupyterHub(Application): def init_signal(self): signal.signal(signal.SIGTERM, self.sigterm) + if hasattr(signal, 'SIGINFO'): + signal.signal(signal.SIGINFO, self.log_status) + + def log_status(self, signum, frame): + """Log current status, triggered by SIGINFO (^T in many terminals)""" + self.log.debug("Received signal %s[%s]", signum, signal.getsignal(signum)) + print_ps_info() + print_stacks() def sigterm(self, signum, frame): self.log.critical("Received SIGTERM, shutting down") diff --git a/jupyterhub/log.py b/jupyterhub/log.py index 3520d26d..d975e9d6 100644 --- a/jupyterhub/log.py +++ b/jupyterhub/log.py @@ -10,6 +10,25 @@ from tornado.web import StaticFileHandler, HTTPError from .metrics import prometheus_log_method +def coroutine_frames(all_frames): + """Extract coroutine boilerplate frames from a frame list + + for better stack/traceback printing of coroutines + """ + useful_frames = [] + for frame in all_frames: + if frame[0] == '' and frame[2] == 'raise_exc_info': + continue + # start out conservative with filename + function matching + # maybe just filename matching would be sufficient + elif frame[0].endswith('tornado/gen.py') and frame[2] in {'run', 'wrapper', '__init__'}: + continue + elif frame[0].endswith('tornado/concurrent.py') and frame[2] == 'result': + continue + useful_frames.append(frame) + return useful_frames + + def coroutine_traceback(typ, value, tb): """Scrub coroutine frames from a traceback @@ -20,17 +39,8 @@ def coroutine_traceback(typ, value, tb): Returns a list of strings (like traceback.format_tb) """ all_frames = traceback.extract_tb(tb) - useful_frames = [] - for frame in all_frames: - if frame[0] == '' and frame[2] == 'raise_exc_info': - continue - # start out conservative with filename + function matching - # maybe just filename matching would be sufficient - elif frame[0].endswith('tornado/gen.py') and frame[2] in {'run', 'wrapper'}: - continue - elif frame[0].endswith('tornado/concurrent.py') and frame[2] == 'result': - continue - useful_frames.append(frame) + useful_frames = coroutine_frames(all_frames) + tb_list = ['Traceback (most recent call last):\n'] tb_list.extend(traceback.format_list(useful_frames)) tb_list.extend(traceback.format_exception_only(typ, value)) diff --git a/jupyterhub/utils.py b/jupyterhub/utils.py index 5b6e181f..bff45269 100644 --- a/jupyterhub/utils.py +++ b/jupyterhub/utils.py @@ -10,6 +10,8 @@ import hashlib from hmac import compare_digest import os import socket +import sys +import threading from threading import Thread import uuid import warnings @@ -298,3 +300,116 @@ def url_path_join(*pieces): return result + +def print_ps_info(file=sys.stderr): + """Print process summary info from psutil + + warns if psutil is unavailable + """ + try: + import psutil + except ImportError: + # nothing to print + warnings.warn( + "psutil unavailable. Install psutil to get CPU and memory stats", + stacklevel=2 + ) + return + p = psutil.Process() + # format CPU percentage + cpu = p.cpu_percent(0.1) + if cpu >= 10: + cpu_s = "%i" % cpu + else: + cpu_s = "%.1f" % cpu + + # format memory (only resident set) + rss = p.memory_info().rss + if rss >= 1e9: + mem_s = '%.1fG' % (rss/1e9) + elif rss >= 1e7: + mem_s = '%.0fM' % (rss/1e6) + elif rss >= 1e6: + mem_s = '%.1fM' % (rss/1e6) + else: + mem_s = '%.0fk' % (rss/1e3) + + # left-justify and shrink-to-fit columns + cpulen = max(len(cpu_s), 4) + memlen = max(len(mem_s), 3) + fd_s = str(p.num_fds()) + fdlen = max(len(fd_s), 3) + threadlen = len('threads') + + print("%s %s %s %s" % ( + '%CPU'.ljust(cpulen), + 'MEM'.ljust(memlen), + 'FDs'.ljust(fdlen), + 'threads', + ), file=file) + + print("%s %s %s %s" % ( + cpu_s.ljust(cpulen), + mem_s.ljust(memlen), + fd_s.ljust(fdlen), + str(p.num_threads()).ljust(7), + ), file=file) + + # trailing blank line + print('', file=file) + + +def print_stacks(file=sys.stderr): + """Print current status of the process + + For debugging purposes. + Used as part of SIGINFO handler. + + - Shows active thread count + - Shows current stack for all threads + + Parameters: + + file: file to write output to (default: stderr) + + """ + # local imports because these will not be used often, + # no need to add them to startup + import asyncio + import resource + import traceback + from .log import coroutine_frames + + print("Active threads: %i" % threading.active_count(), file=file) + for thread in threading.enumerate(): + print("Thread %s:" % thread.name, end='', file=file) + frame = sys._current_frames()[thread.ident] + stack = traceback.extract_stack(frame) + if thread is threading.current_thread(): + # truncate last two frames of the current thread + # which are this function and its caller + stack = stack[:-2] + stack = coroutine_frames(stack) + if stack: + last_frame = stack[-1] + if ( + last_frame[0].endswith('threading.py') + and last_frame[-1] == 'waiter.acquire()' + ): + # thread is waiting on a condition + # call it idle rather than showing the uninteresting stack + # most threadpools will be in this state + print(' idle', file=file) + continue + + print(''.join(['\n'] + traceback.format_list(stack)), file=file) + + # also show asyncio tasks, if any + # this will increase over time as we transition from tornado + # coroutines to native `async def` + tasks = asyncio.Task.all_tasks() + if tasks: + print("AsyncIO tasks: %i" % len(tasks)) + for task in tasks: + task.print_stack(file=file) +