test-driver: Implement debugging breakpoint hooks
Co-authored-by: Maximilian Bosch <maximilian@mbosch.me>
This commit is contained in:
committed by
Maximilian Bosch
parent
a86f342ae3
commit
d6b326d659
@@ -340,3 +340,54 @@ id-prefix: test-opt-
|
|||||||
list-id: test-options-list
|
list-id: test-options-list
|
||||||
source: @NIXOS_TEST_OPTIONS_JSON@
|
source: @NIXOS_TEST_OPTIONS_JSON@
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Accessing VMs in the sandbox with SSH {#sec-test-sandbox-breakpoint}
|
||||||
|
|
||||||
|
As explained in [](#sec-nixos-test-ssh-access), it's possible to configure an
|
||||||
|
SSH backdoor based on AF_VSOCK. This can be used to SSH into a VM of a running
|
||||||
|
build in a sandbox.
|
||||||
|
|
||||||
|
This can be done when something in the test fails, e.g.
|
||||||
|
|
||||||
|
```nix
|
||||||
|
{
|
||||||
|
nodes.machine = {};
|
||||||
|
|
||||||
|
sshBackdoor.enable = true;
|
||||||
|
enableDebugHook = true;
|
||||||
|
|
||||||
|
testScript = ''
|
||||||
|
start_all()
|
||||||
|
machine.succeed("false") # this will fail
|
||||||
|
'';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
For the AF_VSOCK feature to work, `/dev/vhost-vsock` is needed in the sandbox
|
||||||
|
which can be done with e.g.
|
||||||
|
|
||||||
|
```
|
||||||
|
nix-build -A nixosTests.foo --option sandbox-paths /dev/vhost-vsock
|
||||||
|
```
|
||||||
|
|
||||||
|
This will halt the test execution on a test-failure and print instructions
|
||||||
|
on how to enter the sandbox shell of the VM test. Inside, one can log into
|
||||||
|
e.g. `machine` with
|
||||||
|
|
||||||
|
```
|
||||||
|
ssh -F ./ssh_config vsock/3
|
||||||
|
```
|
||||||
|
|
||||||
|
As described in [](#sec-nixos-test-ssh-access), the numbers for vsock start at
|
||||||
|
`3` instead of `1`. So the first VM in the network (sorted alphabetically) can
|
||||||
|
be accessed with `vsock/3`.
|
||||||
|
|
||||||
|
Alternatively, it's possible to explicitly set a breakpoint with
|
||||||
|
`debug.breakpoint()`. This also has the benefit, that one can step through
|
||||||
|
`testScript` with `pdb` like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo /nix/store/eeeee-attach <id>
|
||||||
|
bash# telnet 127.0.0.1 4444
|
||||||
|
pdb$ …
|
||||||
|
```
|
||||||
|
|||||||
@@ -1902,6 +1902,9 @@
|
|||||||
"test-opt-sshBackdoor.vsockOffset": [
|
"test-opt-sshBackdoor.vsockOffset": [
|
||||||
"index.html#test-opt-sshBackdoor.vsockOffset"
|
"index.html#test-opt-sshBackdoor.vsockOffset"
|
||||||
],
|
],
|
||||||
|
"test-opt-enableDebugHook": [
|
||||||
|
"index.html#test-opt-enableDebugHook"
|
||||||
|
],
|
||||||
"test-opt-defaults": [
|
"test-opt-defaults": [
|
||||||
"index.html#test-opt-defaults"
|
"index.html#test-opt-defaults"
|
||||||
],
|
],
|
||||||
@@ -2010,6 +2013,9 @@
|
|||||||
"sec-nixos-test-testing-hardware-features": [
|
"sec-nixos-test-testing-hardware-features": [
|
||||||
"index.html#sec-nixos-test-testing-hardware-features"
|
"index.html#sec-nixos-test-testing-hardware-features"
|
||||||
],
|
],
|
||||||
|
"sec-test-sandbox-breakpoint": [
|
||||||
|
"index.html#sec-test-sandbox-breakpoint"
|
||||||
|
],
|
||||||
"chap-developing-the-test-driver": [
|
"chap-developing-the-test-driver": [
|
||||||
"index.html#chap-developing-the-test-driver"
|
"index.html#chap-developing-the-test-driver"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
extraPythonPackages ? (_: [ ]),
|
extraPythonPackages ? (_: [ ]),
|
||||||
nixosTests,
|
nixosTests,
|
||||||
}:
|
}:
|
||||||
|
|
||||||
python3Packages.buildPythonApplication {
|
python3Packages.buildPythonApplication {
|
||||||
pname = "nixos-test-driver";
|
pname = "nixos-test-driver";
|
||||||
version = "1.1";
|
version = "1.1";
|
||||||
@@ -32,6 +33,7 @@ python3Packages.buildPythonApplication {
|
|||||||
junit-xml
|
junit-xml
|
||||||
ptpython
|
ptpython
|
||||||
ipython
|
ipython
|
||||||
|
remote-pdb
|
||||||
]
|
]
|
||||||
++ extraPythonPackages python3Packages;
|
++ extraPythonPackages python3Packages;
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import ptpython.ipython
|
import ptpython.ipython
|
||||||
|
|
||||||
|
from test_driver.debug import Debug, DebugAbstract, DebugNop
|
||||||
from test_driver.driver import Driver
|
from test_driver.driver import Driver
|
||||||
from test_driver.logger import (
|
from test_driver.logger import (
|
||||||
CompositeLogger,
|
CompositeLogger,
|
||||||
@@ -65,6 +66,10 @@ def main() -> None:
|
|||||||
help="drop into a python repl and run the tests interactively",
|
help="drop into a python repl and run the tests interactively",
|
||||||
action=argparse.BooleanOptionalAction,
|
action=argparse.BooleanOptionalAction,
|
||||||
)
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--debug-hook-attach",
|
||||||
|
help="Enable interactive debugging breakpoints for sandboxed runs",
|
||||||
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"--start-scripts",
|
"--start-scripts",
|
||||||
metavar="START-SCRIPT",
|
metavar="START-SCRIPT",
|
||||||
@@ -129,6 +134,10 @@ def main() -> None:
|
|||||||
if not args.keep_vm_state:
|
if not args.keep_vm_state:
|
||||||
logger.info("Machine state will be reset. To keep it, pass --keep-vm-state")
|
logger.info("Machine state will be reset. To keep it, pass --keep-vm-state")
|
||||||
|
|
||||||
|
debugger: DebugAbstract = DebugNop()
|
||||||
|
if args.debug_hook_attach is not None:
|
||||||
|
debugger = Debug(logger, args.debug_hook_attach)
|
||||||
|
|
||||||
with Driver(
|
with Driver(
|
||||||
args.start_scripts,
|
args.start_scripts,
|
||||||
args.vlans,
|
args.vlans,
|
||||||
@@ -137,6 +146,7 @@ def main() -> None:
|
|||||||
logger,
|
logger,
|
||||||
args.keep_vm_state,
|
args.keep_vm_state,
|
||||||
args.global_timeout,
|
args.global_timeout,
|
||||||
|
debug=debugger,
|
||||||
) as driver:
|
) as driver:
|
||||||
if args.interactive:
|
if args.interactive:
|
||||||
history_dir = os.getcwd()
|
history_dir = os.getcwd()
|
||||||
|
|||||||
53
nixos/lib/test-driver/src/test_driver/debug.py
Normal file
53
nixos/lib/test-driver/src/test_driver/debug.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from remote_pdb import RemotePdb # type:ignore
|
||||||
|
|
||||||
|
from test_driver.logger import AbstractLogger
|
||||||
|
|
||||||
|
|
||||||
|
class DebugAbstract(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DebugNop(DebugAbstract):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Debug(DebugAbstract):
|
||||||
|
def __init__(self, logger: AbstractLogger, attach_command: str) -> None:
|
||||||
|
self.breakpoint_on_failure = False
|
||||||
|
self.logger = logger
|
||||||
|
self.attach = attach_command
|
||||||
|
|
||||||
|
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
|
||||||
|
"""
|
||||||
|
Call this function to stop execution and put the process on sleep while
|
||||||
|
at the same time have the test driver provide a debug shell on TCP port
|
||||||
|
`port`. This is meant to be used for sandboxed tests that have the test
|
||||||
|
driver feature `enableDebugHook` enabled.
|
||||||
|
"""
|
||||||
|
pattern = str(random.randrange(999999, 9999999))
|
||||||
|
self.logger.log_test_error(
|
||||||
|
f"Breakpoint reached, run 'sudo {self.attach} {pattern}'"
|
||||||
|
)
|
||||||
|
os.environ["bashInteractive"] = shutil.which("bash") # type:ignore
|
||||||
|
if os.fork() == 0:
|
||||||
|
subprocess.run(["sleep", pattern])
|
||||||
|
else:
|
||||||
|
# RemotePdb writes log messages to both stderr AND the logger,
|
||||||
|
# which is the same here. Hence, disabling the remote_pdb logger
|
||||||
|
# to avoid duplicate messages in the build log.
|
||||||
|
logging.root.manager.loggerDict["remote_pdb"].disabled = True # type:ignore
|
||||||
|
RemotePdb(host=host, port=port).set_trace(sys._getframe().f_back)
|
||||||
@@ -13,6 +13,7 @@ from unittest import TestCase
|
|||||||
|
|
||||||
from colorama import Style
|
from colorama import Style
|
||||||
|
|
||||||
|
from test_driver.debug import DebugAbstract, DebugNop
|
||||||
from test_driver.errors import MachineError, RequestedAssertionFailed
|
from test_driver.errors import MachineError, RequestedAssertionFailed
|
||||||
from test_driver.logger import AbstractLogger
|
from test_driver.logger import AbstractLogger
|
||||||
from test_driver.machine import Machine, NixStartScript, retry
|
from test_driver.machine import Machine, NixStartScript, retry
|
||||||
@@ -67,6 +68,7 @@ class Driver:
|
|||||||
global_timeout: int
|
global_timeout: int
|
||||||
race_timer: threading.Timer
|
race_timer: threading.Timer
|
||||||
logger: AbstractLogger
|
logger: AbstractLogger
|
||||||
|
debug: DebugAbstract
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -77,12 +79,14 @@ class Driver:
|
|||||||
logger: AbstractLogger,
|
logger: AbstractLogger,
|
||||||
keep_vm_state: bool = False,
|
keep_vm_state: bool = False,
|
||||||
global_timeout: int = 24 * 60 * 60 * 7,
|
global_timeout: int = 24 * 60 * 60 * 7,
|
||||||
|
debug: DebugAbstract = DebugNop(),
|
||||||
):
|
):
|
||||||
self.tests = tests
|
self.tests = tests
|
||||||
self.out_dir = out_dir
|
self.out_dir = out_dir
|
||||||
self.global_timeout = global_timeout
|
self.global_timeout = global_timeout
|
||||||
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
|
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
|
self.debug = debug
|
||||||
|
|
||||||
tmp_dir = get_tmp_dir()
|
tmp_dir = get_tmp_dir()
|
||||||
|
|
||||||
@@ -159,6 +163,7 @@ class Driver:
|
|||||||
polling_condition=self.polling_condition,
|
polling_condition=self.polling_condition,
|
||||||
Machine=Machine, # for typing
|
Machine=Machine, # for typing
|
||||||
t=AssertionTester(),
|
t=AssertionTester(),
|
||||||
|
debug=self.debug,
|
||||||
)
|
)
|
||||||
machine_symbols = {pythonize_name(m.name): m for m in self.machines}
|
machine_symbols = {pythonize_name(m.name): m for m in self.machines}
|
||||||
# If there's exactly one machine, make it available under the name
|
# If there's exactly one machine, make it available under the name
|
||||||
@@ -224,8 +229,14 @@ class Driver:
|
|||||||
for line in f"{exc_prefix}: {exc}".splitlines():
|
for line in f"{exc_prefix}: {exc}".splitlines():
|
||||||
self.logger.log_test_error(line)
|
self.logger.log_test_error(line)
|
||||||
|
|
||||||
|
self.debug.breakpoint()
|
||||||
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
self.debug.breakpoint()
|
||||||
|
raise
|
||||||
|
|
||||||
def run_tests(self) -> None:
|
def run_tests(self) -> None:
|
||||||
"""Run the test script (for non-interactive test runs)"""
|
"""Run the test script (for non-interactive test runs)"""
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# This file contains type hints that can be prepended to Nix test scripts so they can be type
|
# This file contains type hints that can be prepended to Nix test scripts so they can be type
|
||||||
# checked.
|
# checked.
|
||||||
|
|
||||||
|
from test_driver.debug import DebugAbstract
|
||||||
from test_driver.driver import Driver
|
from test_driver.driver import Driver
|
||||||
from test_driver.vlan import VLan
|
from test_driver.vlan import VLan
|
||||||
from test_driver.machine import Machine
|
from test_driver.machine import Machine
|
||||||
@@ -52,4 +53,5 @@ join_all: Callable[[], None]
|
|||||||
serial_stdout_off: Callable[[], None]
|
serial_stdout_off: Callable[[], None]
|
||||||
serial_stdout_on: Callable[[], None]
|
serial_stdout_on: Callable[[], None]
|
||||||
polling_condition: PollingConditionProtocol
|
polling_condition: PollingConditionProtocol
|
||||||
|
debug: DebugAbstract
|
||||||
t: TestCase
|
t: TestCase
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
}:
|
}:
|
||||||
let
|
let
|
||||||
inherit (lib) types mkOption;
|
inherit (lib) types mkOption;
|
||||||
|
inherit (hostPkgs.stdenv.hostPlatform) isDarwin isLinux;
|
||||||
|
|
||||||
# TODO (lib): Also use lib equivalent in nodes.nix
|
# TODO (lib): Also use lib equivalent in nodes.nix
|
||||||
/**
|
/**
|
||||||
@@ -26,7 +27,6 @@ let
|
|||||||
*/
|
*/
|
||||||
f:
|
f:
|
||||||
lib.mkOverride (opt.highestPrio - 1) (f opt.value);
|
lib.mkOverride (opt.highestPrio - 1) (f opt.value);
|
||||||
|
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
options = {
|
options = {
|
||||||
@@ -42,6 +42,15 @@ in
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enableDebugHook = lib.mkEnableOption "" // {
|
||||||
|
description = ''
|
||||||
|
Halt test execution after any test fail and provide the possibility to
|
||||||
|
hook into the sandbox to connect with either the test driver via
|
||||||
|
`telnet localhost 4444` or with the VMs via SSH and vsocks (see also
|
||||||
|
`sshBackdoor.enable`).
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
rawTestDerivation = mkOption {
|
rawTestDerivation = mkOption {
|
||||||
type = types.package;
|
type = types.package;
|
||||||
description = ''
|
description = ''
|
||||||
@@ -74,15 +83,23 @@ in
|
|||||||
rawTestDerivation = hostPkgs.stdenv.mkDerivation config.rawTestDerivationArg;
|
rawTestDerivation = hostPkgs.stdenv.mkDerivation config.rawTestDerivationArg;
|
||||||
rawTestDerivationArg =
|
rawTestDerivationArg =
|
||||||
finalAttrs:
|
finalAttrs:
|
||||||
assert lib.assertMsg (!config.sshBackdoor.enable)
|
assert lib.assertMsg (
|
||||||
"The SSH backdoor is currently not supported for non-interactive testing! Please make sure to only set `interactive.sshBackdoor.enable = true;`!";
|
config.sshBackdoor.enable -> isLinux
|
||||||
|
) "The SSH backdoor is not supported for macOS host systems!";
|
||||||
|
|
||||||
|
assert lib.assertMsg (
|
||||||
|
config.enableDebugHook -> isLinux
|
||||||
|
) "The debugging hook is not supported for macOS host systems!";
|
||||||
{
|
{
|
||||||
name = "vm-test-run-${config.name}";
|
name = "vm-test-run-${config.name}";
|
||||||
|
|
||||||
requiredSystemFeatures =
|
requiredSystemFeatures =
|
||||||
[ "nixos-test" ]
|
[ "nixos-test" ] ++ lib.optional isLinux "kvm" ++ lib.optional isDarwin "apple-virt";
|
||||||
++ lib.optionals hostPkgs.stdenv.hostPlatform.isLinux [ "kvm" ]
|
|
||||||
++ lib.optionals hostPkgs.stdenv.hostPlatform.isDarwin [ "apple-virt" ];
|
nativeBuildInputs = lib.optionals config.enableDebugHook [
|
||||||
|
hostPkgs.openssh
|
||||||
|
hostPkgs.inetutils
|
||||||
|
];
|
||||||
|
|
||||||
buildCommand = ''
|
buildCommand = ''
|
||||||
mkdir -p $out
|
mkdir -p $out
|
||||||
@@ -90,7 +107,15 @@ in
|
|||||||
# effectively mute the XMLLogger
|
# effectively mute the XMLLogger
|
||||||
export LOGFILE=/dev/null
|
export LOGFILE=/dev/null
|
||||||
|
|
||||||
${config.driver}/bin/nixos-test-driver -o $out
|
${lib.optionalString config.enableDebugHook ''
|
||||||
|
ln -sf \
|
||||||
|
${hostPkgs.systemd}/lib/systemd/ssh_config.d/20-systemd-ssh-proxy.conf \
|
||||||
|
ssh_config
|
||||||
|
''}
|
||||||
|
|
||||||
|
${config.driver}/bin/nixos-test-driver \
|
||||||
|
-o $out \
|
||||||
|
${lib.optionalString config.enableDebugHook "--debug-hook=${hostPkgs.breakpointHook.attach}"}
|
||||||
'';
|
'';
|
||||||
|
|
||||||
passthru = config.passthru;
|
passthru = config.passthru;
|
||||||
|
|||||||
Reference in New Issue
Block a user