test-driver: Implement debugging breakpoint hooks

Co-authored-by: Maximilian Bosch <maximilian@mbosch.me>
This commit is contained in:
Jacek Galowicz
2025-07-03 09:46:59 +00:00
committed by Maximilian Bosch
parent a86f342ae3
commit d6b326d659
8 changed files with 167 additions and 7 deletions

View File

@@ -14,6 +14,7 @@
extraPythonPackages ? (_: [ ]),
nixosTests,
}:
python3Packages.buildPythonApplication {
pname = "nixos-test-driver";
version = "1.1";
@@ -32,6 +33,7 @@ python3Packages.buildPythonApplication {
junit-xml
ptpython
ipython
remote-pdb
]
++ extraPythonPackages python3Packages;

View File

@@ -5,6 +5,7 @@ from pathlib import Path
import ptpython.ipython
from test_driver.debug import Debug, DebugAbstract, DebugNop
from test_driver.driver import Driver
from test_driver.logger import (
CompositeLogger,
@@ -65,6 +66,10 @@ def main() -> None:
help="drop into a python repl and run the tests interactively",
action=argparse.BooleanOptionalAction,
)
arg_parser.add_argument(
"--debug-hook-attach",
help="Enable interactive debugging breakpoints for sandboxed runs",
)
arg_parser.add_argument(
"--start-scripts",
metavar="START-SCRIPT",
@@ -129,6 +134,10 @@ def main() -> None:
if not args.keep_vm_state:
logger.info("Machine state will be reset. To keep it, pass --keep-vm-state")
debugger: DebugAbstract = DebugNop()
if args.debug_hook_attach is not None:
debugger = Debug(logger, args.debug_hook_attach)
with Driver(
args.start_scripts,
args.vlans,
@@ -137,6 +146,7 @@ def main() -> None:
logger,
args.keep_vm_state,
args.global_timeout,
debug=debugger,
) as driver:
if args.interactive:
history_dir = os.getcwd()

View File

@@ -0,0 +1,53 @@
import logging
import os
import random
import shutil
import subprocess
import sys
from abc import ABC, abstractmethod
from remote_pdb import RemotePdb # type:ignore
from test_driver.logger import AbstractLogger
class DebugAbstract(ABC):
@abstractmethod
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
pass
class DebugNop(DebugAbstract):
def __init__(self) -> None:
pass
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
pass
class Debug(DebugAbstract):
def __init__(self, logger: AbstractLogger, attach_command: str) -> None:
self.breakpoint_on_failure = False
self.logger = logger
self.attach = attach_command
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
"""
Call this function to stop execution and put the process on sleep while
at the same time have the test driver provide a debug shell on TCP port
`port`. This is meant to be used for sandboxed tests that have the test
driver feature `enableDebugHook` enabled.
"""
pattern = str(random.randrange(999999, 9999999))
self.logger.log_test_error(
f"Breakpoint reached, run 'sudo {self.attach} {pattern}'"
)
os.environ["bashInteractive"] = shutil.which("bash") # type:ignore
if os.fork() == 0:
subprocess.run(["sleep", pattern])
else:
# RemotePdb writes log messages to both stderr AND the logger,
# which is the same here. Hence, disabling the remote_pdb logger
# to avoid duplicate messages in the build log.
logging.root.manager.loggerDict["remote_pdb"].disabled = True # type:ignore
RemotePdb(host=host, port=port).set_trace(sys._getframe().f_back)

View File

@@ -13,6 +13,7 @@ from unittest import TestCase
from colorama import Style
from test_driver.debug import DebugAbstract, DebugNop
from test_driver.errors import MachineError, RequestedAssertionFailed
from test_driver.logger import AbstractLogger
from test_driver.machine import Machine, NixStartScript, retry
@@ -67,6 +68,7 @@ class Driver:
global_timeout: int
race_timer: threading.Timer
logger: AbstractLogger
debug: DebugAbstract
def __init__(
self,
@@ -77,12 +79,14 @@ class Driver:
logger: AbstractLogger,
keep_vm_state: bool = False,
global_timeout: int = 24 * 60 * 60 * 7,
debug: DebugAbstract = DebugNop(),
):
self.tests = tests
self.out_dir = out_dir
self.global_timeout = global_timeout
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
self.logger = logger
self.debug = debug
tmp_dir = get_tmp_dir()
@@ -159,6 +163,7 @@ class Driver:
polling_condition=self.polling_condition,
Machine=Machine, # for typing
t=AssertionTester(),
debug=self.debug,
)
machine_symbols = {pythonize_name(m.name): m for m in self.machines}
# If there's exactly one machine, make it available under the name
@@ -224,8 +229,14 @@ class Driver:
for line in f"{exc_prefix}: {exc}".splitlines():
self.logger.log_test_error(line)
self.debug.breakpoint()
sys.exit(1)
except Exception:
self.debug.breakpoint()
raise
def run_tests(self) -> None:
"""Run the test script (for non-interactive test runs)"""
self.logger.info(

View File

@@ -1,6 +1,7 @@
# This file contains type hints that can be prepended to Nix test scripts so they can be type
# checked.
from test_driver.debug import DebugAbstract
from test_driver.driver import Driver
from test_driver.vlan import VLan
from test_driver.machine import Machine
@@ -52,4 +53,5 @@ join_all: Callable[[], None]
serial_stdout_off: Callable[[], None]
serial_stdout_on: Callable[[], None]
polling_condition: PollingConditionProtocol
debug: DebugAbstract
t: TestCase

View File

@@ -7,6 +7,7 @@
}:
let
inherit (lib) types mkOption;
inherit (hostPkgs.stdenv.hostPlatform) isDarwin isLinux;
# TODO (lib): Also use lib equivalent in nodes.nix
/**
@@ -26,7 +27,6 @@ let
*/
f:
lib.mkOverride (opt.highestPrio - 1) (f opt.value);
in
{
options = {
@@ -42,6 +42,15 @@ in
'';
};
enableDebugHook = lib.mkEnableOption "" // {
description = ''
Halt test execution after any test fail and provide the possibility to
hook into the sandbox to connect with either the test driver via
`telnet localhost 4444` or with the VMs via SSH and vsocks (see also
`sshBackdoor.enable`).
'';
};
rawTestDerivation = mkOption {
type = types.package;
description = ''
@@ -74,15 +83,23 @@ in
rawTestDerivation = hostPkgs.stdenv.mkDerivation config.rawTestDerivationArg;
rawTestDerivationArg =
finalAttrs:
assert lib.assertMsg (!config.sshBackdoor.enable)
"The SSH backdoor is currently not supported for non-interactive testing! Please make sure to only set `interactive.sshBackdoor.enable = true;`!";
assert lib.assertMsg (
config.sshBackdoor.enable -> isLinux
) "The SSH backdoor is not supported for macOS host systems!";
assert lib.assertMsg (
config.enableDebugHook -> isLinux
) "The debugging hook is not supported for macOS host systems!";
{
name = "vm-test-run-${config.name}";
requiredSystemFeatures =
[ "nixos-test" ]
++ lib.optionals hostPkgs.stdenv.hostPlatform.isLinux [ "kvm" ]
++ lib.optionals hostPkgs.stdenv.hostPlatform.isDarwin [ "apple-virt" ];
[ "nixos-test" ] ++ lib.optional isLinux "kvm" ++ lib.optional isDarwin "apple-virt";
nativeBuildInputs = lib.optionals config.enableDebugHook [
hostPkgs.openssh
hostPkgs.inetutils
];
buildCommand = ''
mkdir -p $out
@@ -90,7 +107,15 @@ in
# effectively mute the XMLLogger
export LOGFILE=/dev/null
${config.driver}/bin/nixos-test-driver -o $out
${lib.optionalString config.enableDebugHook ''
ln -sf \
${hostPkgs.systemd}/lib/systemd/ssh_config.d/20-systemd-ssh-proxy.conf \
ssh_config
''}
${config.driver}/bin/nixos-test-driver \
-o $out \
${lib.optionalString config.enableDebugHook "--debug-hook=${hostPkgs.breakpointHook.attach}"}
'';
passthru = config.passthru;