test-driver: Implement debugging breakpoint hooks

Co-authored-by: Maximilian Bosch <maximilian@mbosch.me>
This commit is contained in:
Jacek Galowicz
2025-07-03 09:46:59 +00:00
committed by Maximilian Bosch
parent a86f342ae3
commit d6b326d659
8 changed files with 167 additions and 7 deletions

View File

@@ -340,3 +340,54 @@ id-prefix: test-opt-
list-id: test-options-list
source: @NIXOS_TEST_OPTIONS_JSON@
```
## Accessing VMs in the sandbox with SSH {#sec-test-sandbox-breakpoint}
As explained in [](#sec-nixos-test-ssh-access), it's possible to configure an
SSH backdoor based on AF_VSOCK. This can be used to SSH into a VM of a running
build in a sandbox.
This can be done when something in the test fails, e.g.
```nix
{
nodes.machine = {};
sshBackdoor.enable = true;
enableDebugHook = true;
testScript = ''
start_all()
machine.succeed("false") # this will fail
'';
}
```
For the AF_VSOCK feature to work, `/dev/vhost-vsock` is needed in the sandbox
which can be done with e.g.
```
nix-build -A nixosTests.foo --option sandbox-paths /dev/vhost-vsock
```
This will halt the test execution on a test-failure and print instructions
on how to enter the sandbox shell of the VM test. Inside, one can log into
e.g. `machine` with
```
ssh -F ./ssh_config vsock/3
```
As described in [](#sec-nixos-test-ssh-access), the numbers for vsock start at
`3` instead of `1`. So the first VM in the network (sorted alphabetically) can
be accessed with `vsock/3`.
Alternatively, it's possible to explicitly set a breakpoint with
`debug.breakpoint()`. This also has the benefit, that one can step through
`testScript` with `pdb` like this:
```
$ sudo /nix/store/eeeee-attach <id>
bash# telnet 127.0.0.1 4444
pdb$ …
```

View File

@@ -1902,6 +1902,9 @@
"test-opt-sshBackdoor.vsockOffset": [
"index.html#test-opt-sshBackdoor.vsockOffset"
],
"test-opt-enableDebugHook": [
"index.html#test-opt-enableDebugHook"
],
"test-opt-defaults": [
"index.html#test-opt-defaults"
],
@@ -2010,6 +2013,9 @@
"sec-nixos-test-testing-hardware-features": [
"index.html#sec-nixos-test-testing-hardware-features"
],
"sec-test-sandbox-breakpoint": [
"index.html#sec-test-sandbox-breakpoint"
],
"chap-developing-the-test-driver": [
"index.html#chap-developing-the-test-driver"
],

View File

@@ -14,6 +14,7 @@
extraPythonPackages ? (_: [ ]),
nixosTests,
}:
python3Packages.buildPythonApplication {
pname = "nixos-test-driver";
version = "1.1";
@@ -32,6 +33,7 @@ python3Packages.buildPythonApplication {
junit-xml
ptpython
ipython
remote-pdb
]
++ extraPythonPackages python3Packages;

View File

@@ -5,6 +5,7 @@ from pathlib import Path
import ptpython.ipython
from test_driver.debug import Debug, DebugAbstract, DebugNop
from test_driver.driver import Driver
from test_driver.logger import (
CompositeLogger,
@@ -65,6 +66,10 @@ def main() -> None:
help="drop into a python repl and run the tests interactively",
action=argparse.BooleanOptionalAction,
)
arg_parser.add_argument(
"--debug-hook-attach",
help="Enable interactive debugging breakpoints for sandboxed runs",
)
arg_parser.add_argument(
"--start-scripts",
metavar="START-SCRIPT",
@@ -129,6 +134,10 @@ def main() -> None:
if not args.keep_vm_state:
logger.info("Machine state will be reset. To keep it, pass --keep-vm-state")
debugger: DebugAbstract = DebugNop()
if args.debug_hook_attach is not None:
debugger = Debug(logger, args.debug_hook_attach)
with Driver(
args.start_scripts,
args.vlans,
@@ -137,6 +146,7 @@ def main() -> None:
logger,
args.keep_vm_state,
args.global_timeout,
debug=debugger,
) as driver:
if args.interactive:
history_dir = os.getcwd()

View File

@@ -0,0 +1,53 @@
import logging
import os
import random
import shutil
import subprocess
import sys
from abc import ABC, abstractmethod
from remote_pdb import RemotePdb # type:ignore
from test_driver.logger import AbstractLogger
class DebugAbstract(ABC):
@abstractmethod
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
pass
class DebugNop(DebugAbstract):
def __init__(self) -> None:
pass
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
pass
class Debug(DebugAbstract):
def __init__(self, logger: AbstractLogger, attach_command: str) -> None:
self.breakpoint_on_failure = False
self.logger = logger
self.attach = attach_command
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
"""
Call this function to stop execution and put the process on sleep while
at the same time have the test driver provide a debug shell on TCP port
`port`. This is meant to be used for sandboxed tests that have the test
driver feature `enableDebugHook` enabled.
"""
pattern = str(random.randrange(999999, 9999999))
self.logger.log_test_error(
f"Breakpoint reached, run 'sudo {self.attach} {pattern}'"
)
os.environ["bashInteractive"] = shutil.which("bash") # type:ignore
if os.fork() == 0:
subprocess.run(["sleep", pattern])
else:
# RemotePdb writes log messages to both stderr AND the logger,
# which is the same here. Hence, disabling the remote_pdb logger
# to avoid duplicate messages in the build log.
logging.root.manager.loggerDict["remote_pdb"].disabled = True # type:ignore
RemotePdb(host=host, port=port).set_trace(sys._getframe().f_back)

View File

@@ -13,6 +13,7 @@ from unittest import TestCase
from colorama import Style
from test_driver.debug import DebugAbstract, DebugNop
from test_driver.errors import MachineError, RequestedAssertionFailed
from test_driver.logger import AbstractLogger
from test_driver.machine import Machine, NixStartScript, retry
@@ -67,6 +68,7 @@ class Driver:
global_timeout: int
race_timer: threading.Timer
logger: AbstractLogger
debug: DebugAbstract
def __init__(
self,
@@ -77,12 +79,14 @@ class Driver:
logger: AbstractLogger,
keep_vm_state: bool = False,
global_timeout: int = 24 * 60 * 60 * 7,
debug: DebugAbstract = DebugNop(),
):
self.tests = tests
self.out_dir = out_dir
self.global_timeout = global_timeout
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
self.logger = logger
self.debug = debug
tmp_dir = get_tmp_dir()
@@ -159,6 +163,7 @@ class Driver:
polling_condition=self.polling_condition,
Machine=Machine, # for typing
t=AssertionTester(),
debug=self.debug,
)
machine_symbols = {pythonize_name(m.name): m for m in self.machines}
# If there's exactly one machine, make it available under the name
@@ -224,8 +229,14 @@ class Driver:
for line in f"{exc_prefix}: {exc}".splitlines():
self.logger.log_test_error(line)
self.debug.breakpoint()
sys.exit(1)
except Exception:
self.debug.breakpoint()
raise
def run_tests(self) -> None:
"""Run the test script (for non-interactive test runs)"""
self.logger.info(

View File

@@ -1,6 +1,7 @@
# This file contains type hints that can be prepended to Nix test scripts so they can be type
# checked.
from test_driver.debug import DebugAbstract
from test_driver.driver import Driver
from test_driver.vlan import VLan
from test_driver.machine import Machine
@@ -52,4 +53,5 @@ join_all: Callable[[], None]
serial_stdout_off: Callable[[], None]
serial_stdout_on: Callable[[], None]
polling_condition: PollingConditionProtocol
debug: DebugAbstract
t: TestCase

View File

@@ -7,6 +7,7 @@
}:
let
inherit (lib) types mkOption;
inherit (hostPkgs.stdenv.hostPlatform) isDarwin isLinux;
# TODO (lib): Also use lib equivalent in nodes.nix
/**
@@ -26,7 +27,6 @@ let
*/
f:
lib.mkOverride (opt.highestPrio - 1) (f opt.value);
in
{
options = {
@@ -42,6 +42,15 @@ in
'';
};
enableDebugHook = lib.mkEnableOption "" // {
description = ''
Halt test execution after any test fail and provide the possibility to
hook into the sandbox to connect with either the test driver via
`telnet localhost 4444` or with the VMs via SSH and vsocks (see also
`sshBackdoor.enable`).
'';
};
rawTestDerivation = mkOption {
type = types.package;
description = ''
@@ -74,15 +83,23 @@ in
rawTestDerivation = hostPkgs.stdenv.mkDerivation config.rawTestDerivationArg;
rawTestDerivationArg =
finalAttrs:
assert lib.assertMsg (!config.sshBackdoor.enable)
"The SSH backdoor is currently not supported for non-interactive testing! Please make sure to only set `interactive.sshBackdoor.enable = true;`!";
assert lib.assertMsg (
config.sshBackdoor.enable -> isLinux
) "The SSH backdoor is not supported for macOS host systems!";
assert lib.assertMsg (
config.enableDebugHook -> isLinux
) "The debugging hook is not supported for macOS host systems!";
{
name = "vm-test-run-${config.name}";
requiredSystemFeatures =
[ "nixos-test" ]
++ lib.optionals hostPkgs.stdenv.hostPlatform.isLinux [ "kvm" ]
++ lib.optionals hostPkgs.stdenv.hostPlatform.isDarwin [ "apple-virt" ];
[ "nixos-test" ] ++ lib.optional isLinux "kvm" ++ lib.optional isDarwin "apple-virt";
nativeBuildInputs = lib.optionals config.enableDebugHook [
hostPkgs.openssh
hostPkgs.inetutils
];
buildCommand = ''
mkdir -p $out
@@ -90,7 +107,15 @@ in
# effectively mute the XMLLogger
export LOGFILE=/dev/null
${config.driver}/bin/nixos-test-driver -o $out
${lib.optionalString config.enableDebugHook ''
ln -sf \
${hostPkgs.systemd}/lib/systemd/ssh_config.d/20-systemd-ssh-proxy.conf \
ssh_config
''}
${config.driver}/bin/nixos-test-driver \
-o $out \
${lib.optionalString config.enableDebugHook "--debug-hook=${hostPkgs.breakpointHook.attach}"}
'';
passthru = config.passthru;