nixos/modules/virtualisation: additional configuration options (#349537)

oci-containers: additional configuration options
This commit is contained in:
Yethal
2024-12-05 18:48:41 +01:00
committed by GitHub
parent 51e98f6c79
commit 04bf3d8774
2 changed files with 297 additions and 154 deletions

View File

@@ -1,4 +1,10 @@
{ config, options, lib, pkgs, ... }: {
config,
options,
lib,
pkgs,
...
}:
with lib; with lib;
let let
@@ -8,7 +14,8 @@ let
defaultBackend = options.virtualisation.oci-containers.backend.default; defaultBackend = options.virtualisation.oci-containers.backend.default;
containerOptions = containerOptions =
{ ... }: { { ... }:
{
options = { options = {
@@ -77,8 +84,8 @@ let
}; };
cmd = mkOption { cmd = mkOption {
type = with types; listOf str; type = with types; listOf str;
default = []; default = [ ];
description = "Commandline arguments to pass to the image's entrypoint."; description = "Commandline arguments to pass to the image's entrypoint.";
example = literalExpression '' example = literalExpression ''
["--port=9000"] ["--port=9000"]
@@ -87,7 +94,7 @@ let
labels = mkOption { labels = mkOption {
type = with types; attrsOf str; type = with types; attrsOf str;
default = {}; default = { };
description = "Labels to attach to the container at runtime."; description = "Labels to attach to the container at runtime.";
example = literalExpression '' example = literalExpression ''
{ {
@@ -105,26 +112,26 @@ let
environment = mkOption { environment = mkOption {
type = with types; attrsOf str; type = with types; attrsOf str;
default = {}; default = { };
description = "Environment variables to set for this container."; description = "Environment variables to set for this container.";
example = literalExpression '' example = literalExpression ''
{ {
DATABASE_HOST = "db.example.com"; DATABASE_HOST = "db.example.com";
DATABASE_PORT = "3306"; DATABASE_PORT = "3306";
} }
''; '';
}; };
environmentFiles = mkOption { environmentFiles = mkOption {
type = with types; listOf path; type = with types; listOf path;
default = []; default = [ ];
description = "Environment files for this container."; description = "Environment files for this container.";
example = literalExpression '' example = literalExpression ''
[ [
/path/to/.env /path/to/.env
/path/to/.env.secret /path/to/.env.secret
] ]
''; '';
}; };
log-driver = mkOption { log-driver = mkOption {
@@ -147,7 +154,7 @@ let
ports = mkOption { ports = mkOption {
type = with types; listOf str; type = with types; listOf str;
default = []; default = [ ];
description = '' description = ''
Network ports to publish from the container to the outer host. Network ports to publish from the container to the outer host.
@@ -194,7 +201,7 @@ let
volumes = mkOption { volumes = mkOption {
type = with types; listOf str; type = with types; listOf str;
default = []; default = [ ];
description = '' description = ''
List of volumes to attach to this container. List of volumes to attach to this container.
@@ -222,7 +229,7 @@ let
dependsOn = mkOption { dependsOn = mkOption {
type = with types; listOf str; type = with types; listOf str;
default = []; default = [ ];
description = '' description = ''
Define which other containers this one depends on. They will be added to both After and Requires for the unit. Define which other containers this one depends on. They will be added to both After and Requires for the unit.
@@ -247,14 +254,17 @@ let
preRunExtraOptions = mkOption { preRunExtraOptions = mkOption {
type = with types; listOf str; type = with types; listOf str;
default = []; default = [ ];
description = "Extra options for {command}`${defaultBackend}` that go before the `run` argument."; description = "Extra options for {command}`${defaultBackend}` that go before the `run` argument.";
example = [ "--runtime" "runsc" ]; example = [
"--runtime"
"runsc"
];
}; };
extraOptions = mkOption { extraOptions = mkOption {
type = with types; listOf str; type = with types; listOf str;
default = []; default = [ ];
description = "Extra options for {command}`${defaultBackend} run`."; description = "Extra options for {command}`${defaultBackend} run`.";
example = literalExpression '' example = literalExpression ''
["--network=host"] ["--network=host"]
@@ -262,177 +272,293 @@ let
}; };
autoStart = mkOption { autoStart = mkOption {
type = types.bool; type = with types; bool;
default = true; default = true;
description = '' description = ''
When enabled, the container is automatically started on boot. When enabled, the container is automatically started on boot.
If this option is set to false, the container has to be started on-demand via its service. If this option is set to false, the container has to be started on-demand via its service.
''; '';
}; };
pull = mkOption {
type =
with types;
enum [
"always"
"missing"
"never"
"newer"
];
default = "missing";
description = ''
Image pull policy for the container. Must be one of: always, missing, never, newer
'';
};
capAdd = mkOption {
type = with types; lazyAttrsOf (nullOr bool);
default = { };
description = ''
Capabilities to add to container
'';
example = literalExpression ''
{
SYS_ADMIN = true;
{
'';
};
capDrop = mkOption {
type = with types; lazyAttrsOf (nullOr bool);
default = { };
description = ''
Capabilities to drop from container
'';
example = literalExpression ''
{
SYS_ADMIN = true;
{
'';
};
devices = mkOption {
type = with types; listOf str;
default = [ ];
description = ''
List of devices to attach to this container.
'';
example = literalExpression ''
[
"/dev/dri:/dev/dri"
]
'';
};
privileged = mkOption {
type = with types; bool;
default = false;
description = ''
Give extended privileges to the container
'';
};
networks = mkOption {
type = with types; listOf str;
default = [ ];
description = ''
Networks to attach the container to
'';
};
}; };
}; };
isValidLogin = login: login.username != null && login.passwordFile != null && login.registry != null; isValidLogin =
login: login.username != null && login.passwordFile != null && login.registry != null;
mkService = name: container: let mkService =
dependsOn = map (x: "${cfg.backend}-${x}.service") container.dependsOn; name: container:
escapedName = escapeShellArg name; let
preStartScript = pkgs.writeShellApplication { dependsOn = map (x: "${cfg.backend}-${x}.service") container.dependsOn;
name = "pre-start"; escapedName = escapeShellArg name;
runtimeInputs = [ ]; preStartScript = pkgs.writeShellApplication {
text = '' name = "pre-start";
${cfg.backend} rm -f ${name} || true runtimeInputs = [ ];
${optionalString (isValidLogin container.login) '' text = ''
# try logging in, if it fails, check if image exists locally ${cfg.backend} rm -f ${name} || true
${cfg.backend} login \ ${optionalString (isValidLogin container.login) ''
${container.login.registry} \ # try logging in, if it fails, check if image exists locally
--username ${container.login.username} \ ${cfg.backend} login \
--password-stdin < ${container.login.passwordFile} \ ${container.login.registry} \
|| ${cfg.backend} image inspect ${container.image} >/dev/null \ --username ${container.login.username} \
|| { echo "image doesn't exist locally and login failed" >&2 ; exit 1; } --password-stdin < ${container.login.passwordFile} \
''} || ${cfg.backend} image inspect ${container.image} >/dev/null \
${optionalString (container.imageFile != null) '' || { echo "image doesn't exist locally and login failed" >&2 ; exit 1; }
${cfg.backend} load -i ${container.imageFile} ''}
''} ${optionalString (container.imageFile != null) ''
${optionalString (container.imageStream != null) '' ${cfg.backend} load -i ${container.imageFile}
${container.imageStream} | ${cfg.backend} load ''}
''} ${optionalString (container.imageStream != null) ''
${optionalString (cfg.backend == "podman") '' ${container.imageStream} | ${cfg.backend} load
rm -f /run/podman-${escapedName}.ctr-id ''}
''} ${optionalString (cfg.backend == "podman") ''
''; rm -f /run/podman-${escapedName}.ctr-id
''}
'';
};
in
{
wantedBy = [ ] ++ optional (container.autoStart) "multi-user.target";
wants = lib.optional (
container.imageFile == null && container.imageStream == null
) "network-online.target";
after =
lib.optionals (cfg.backend == "docker") [
"docker.service"
"docker.socket"
]
# if imageFile or imageStream is not set, the service needs the network to download the image from the registry
++ lib.optionals (container.imageFile == null && container.imageStream == null) [
"network-online.target"
]
++ dependsOn;
requires = dependsOn;
environment = proxy_env;
path =
if cfg.backend == "docker" then
[ config.virtualisation.docker.package ]
else if cfg.backend == "podman" then
[ config.virtualisation.podman.package ]
else
throw "Unhandled backend: ${cfg.backend}";
script = concatStringsSep " \\\n " (
[
"exec ${cfg.backend} "
]
++ map escapeShellArg container.preRunExtraOptions
++ [
"run"
"--rm"
"--name=${escapedName}"
"--log-driver=${container.log-driver}"
]
++ optional (container.entrypoint != null) "--entrypoint=${escapeShellArg container.entrypoint}"
++ optional (container.hostname != null) "--hostname=${escapeShellArg container.hostname}"
++ lib.optionals (cfg.backend == "podman") [
"--cidfile=/run/podman-${escapedName}.ctr-id"
"--cgroups=no-conmon"
"--sdnotify=conmon"
"-d"
"--replace"
]
++ (mapAttrsToList (k: v: "-e ${escapeShellArg k}=${escapeShellArg v}") container.environment)
++ map (f: "--env-file ${escapeShellArg f}") container.environmentFiles
++ map (p: "-p ${escapeShellArg p}") container.ports
++ optional (container.user != null) "-u ${escapeShellArg container.user}"
++ map (v: "-v ${escapeShellArg v}") container.volumes
++ (mapAttrsToList (k: v: "-l ${escapeShellArg k}=${escapeShellArg v}") container.labels)
++ optional (container.workdir != null) "-w ${escapeShellArg container.workdir}"
++ optional (container.privileged) "--privileged"
++ mapAttrsToList (k: _: "--cap-add=${escapeShellArg k}") (
filterAttrs (_: v: v == true) container.capAdd
)
++ mapAttrsToList (k: _: "--cap-drop=${escapeShellArg k}") (
filterAttrs (_: v: v == true) container.capDrop
)
++ map (d: "--device=${escapeShellArg d}") container.devices
++ map (n: "--network=${escapeShellArg n}") container.networks
++ [ "--pull ${escapeShellArg container.pull}" ]
++ map escapeShellArg container.extraOptions
++ [ container.image ]
++ map escapeShellArg container.cmd
);
preStop =
if cfg.backend == "podman" then
"podman stop --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else
"${cfg.backend} stop ${name} || true";
postStop =
if cfg.backend == "podman" then
"podman rm -f --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else
"${cfg.backend} rm -f ${name} || true";
serviceConfig =
{
### There is no generalized way of supporting `reload` for docker
### containers. Some containers may respond well to SIGHUP sent to their
### init process, but it is not guaranteed; some apps have other reload
### mechanisms, some don't have a reload signal at all, and some docker
### images just have broken signal handling. The best compromise in this
### case is probably to leave ExecReload undefined, so `systemctl reload`
### will at least result in an error instead of potentially undefined
### behaviour.
###
### Advanced users can still override this part of the unit to implement
### a custom reload handler, since the result of all this is a normal
### systemd service from the perspective of the NixOS module system.
###
# ExecReload = ...;
###
ExecStartPre = [ "${preStartScript}/bin/pre-start" ];
TimeoutStartSec = 0;
TimeoutStopSec = 120;
Restart = "always";
}
// optionalAttrs (cfg.backend == "podman") {
Environment = "PODMAN_SYSTEMD_UNIT=podman-${name}.service";
Type = "notify";
NotifyAccess = "all";
};
}; };
in {
wantedBy = [] ++ optional (container.autoStart) "multi-user.target";
wants = lib.optional (container.imageFile == null && container.imageStream == null) "network-online.target";
after = lib.optionals (cfg.backend == "docker") [ "docker.service" "docker.socket" ]
# if imageFile or imageStream is not set, the service needs the network to download the image from the registry
++ lib.optionals (container.imageFile == null && container.imageStream == null) [ "network-online.target" ]
++ dependsOn;
requires = dependsOn;
environment = proxy_env;
path = in
if cfg.backend == "docker" then [ config.virtualisation.docker.package ] {
else if cfg.backend == "podman" then [ config.virtualisation.podman.package ]
else throw "Unhandled backend: ${cfg.backend}";
script = concatStringsSep " \\\n " ([
"exec ${cfg.backend} "
] ++ map escapeShellArg container.preRunExtraOptions ++ [
"run"
"--rm"
"--name=${escapedName}"
"--log-driver=${container.log-driver}"
] ++ optional (container.entrypoint != null)
"--entrypoint=${escapeShellArg container.entrypoint}"
++ optional (container.hostname != null)
"--hostname=${escapeShellArg container.hostname}"
++ lib.optionals (cfg.backend == "podman") [
"--cidfile=/run/podman-${escapedName}.ctr-id"
"--cgroups=no-conmon"
"--sdnotify=conmon"
"-d"
"--replace"
] ++ (mapAttrsToList (k: v: "-e ${escapeShellArg k}=${escapeShellArg v}") container.environment)
++ map (f: "--env-file ${escapeShellArg f}") container.environmentFiles
++ map (p: "-p ${escapeShellArg p}") container.ports
++ optional (container.user != null) "-u ${escapeShellArg container.user}"
++ map (v: "-v ${escapeShellArg v}") container.volumes
++ (mapAttrsToList (k: v: "-l ${escapeShellArg k}=${escapeShellArg v}") container.labels)
++ optional (container.workdir != null) "-w ${escapeShellArg container.workdir}"
++ map escapeShellArg container.extraOptions
++ [container.image]
++ map escapeShellArg container.cmd
);
preStop = if cfg.backend == "podman"
then "podman stop --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else "${cfg.backend} stop ${name} || true";
postStop = if cfg.backend == "podman"
then "podman rm -f --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else "${cfg.backend} rm -f ${name} || true";
serviceConfig = {
### There is no generalized way of supporting `reload` for docker
### containers. Some containers may respond well to SIGHUP sent to their
### init process, but it is not guaranteed; some apps have other reload
### mechanisms, some don't have a reload signal at all, and some docker
### images just have broken signal handling. The best compromise in this
### case is probably to leave ExecReload undefined, so `systemctl reload`
### will at least result in an error instead of potentially undefined
### behaviour.
###
### Advanced users can still override this part of the unit to implement
### a custom reload handler, since the result of all this is a normal
### systemd service from the perspective of the NixOS module system.
###
# ExecReload = ...;
###
ExecStartPre = [ "${preStartScript}/bin/pre-start" ];
TimeoutStartSec = 0;
TimeoutStopSec = 120;
Restart = "always";
} // optionalAttrs (cfg.backend == "podman") {
Environment="PODMAN_SYSTEMD_UNIT=podman-${name}.service";
Type="notify";
NotifyAccess="all";
};
};
in {
imports = [ imports = [
( (lib.mkChangedOptionModule [ "docker-containers" ] [ "virtualisation" "oci-containers" ] (oldcfg: {
lib.mkChangedOptionModule backend = "docker";
[ "docker-containers" ] containers = lib.mapAttrs (
[ "virtualisation" "oci-containers" ] n: v:
(oldcfg: { builtins.removeAttrs (
backend = "docker"; v
containers = lib.mapAttrs (n: v: builtins.removeAttrs (v // { // {
extraOptions = v.extraDockerOptions or []; extraOptions = v.extraDockerOptions or [ ];
}) [ "extraDockerOptions" ]) oldcfg.docker-containers; }
}) ) [ "extraDockerOptions" ]
) ) oldcfg.docker-containers;
}))
]; ];
options.virtualisation.oci-containers = { options.virtualisation.oci-containers = {
backend = mkOption { backend = mkOption {
type = types.enum [ "podman" "docker" ]; type = types.enum [
"podman"
"docker"
];
default = if versionAtLeast config.system.stateVersion "22.05" then "podman" else "docker"; default = if versionAtLeast config.system.stateVersion "22.05" then "podman" else "docker";
description = "The underlying Docker implementation to use."; description = "The underlying Docker implementation to use.";
}; };
containers = mkOption { containers = mkOption {
default = {}; default = { };
type = types.attrsOf (types.submodule containerOptions); type = types.attrsOf (types.submodule containerOptions);
description = "OCI (Docker) containers to run as systemd services."; description = "OCI (Docker) containers to run as systemd services.";
}; };
}; };
config = lib.mkIf (cfg.containers != {}) (lib.mkMerge [ config = lib.mkIf (cfg.containers != { }) (
{ lib.mkMerge [
systemd.services = mapAttrs' (n: v: nameValuePair "${cfg.backend}-${n}" (mkService n v)) cfg.containers; {
systemd.services = mapAttrs' (
n: v: nameValuePair "${cfg.backend}-${n}" (mkService n v)
) cfg.containers;
assertions = assertions =
let let
toAssertion = _: { imageFile, imageStream, ... }: toAssertion =
{ assertion = imageFile == null || imageStream == null; _:
{ imageFile, imageStream, ... }:
{
assertion = imageFile == null || imageStream == null;
message = "You can only define one of imageFile and imageStream"; message = "You can only define one of imageFile and imageStream";
}; };
in in
lib.mapAttrsToList toAssertion cfg.containers; lib.mapAttrsToList toAssertion cfg.containers;
} }
(lib.mkIf (cfg.backend == "podman") { (lib.mkIf (cfg.backend == "podman") {
virtualisation.podman.enable = true; virtualisation.podman.enable = true;
}) })
(lib.mkIf (cfg.backend == "docker") { (lib.mkIf (cfg.backend == "docker") {
virtualisation.docker.enable = true; virtualisation.docker.enable = true;
}) })
]); ]
);
} }

View File

@@ -22,6 +22,16 @@ let
image = "nginx-container"; image = "nginx-container";
imageStream = pkgs.dockerTools.examples.nginxStream; imageStream = pkgs.dockerTools.examples.nginxStream;
ports = ["8181:80"]; ports = ["8181:80"];
capAdd = {
CAP_AUDIT_READ = true;
};
capDrop = {
CAP_AUDIT_WRITE = true;
};
privileged = false;
devices = [
"/dev/random:/dev/random"
];
}; };
}; };
@@ -32,11 +42,18 @@ let
}; };
testScript = '' testScript = ''
import json
start_all() start_all()
${backend}.wait_for_unit("${backend}-nginx.service") ${backend}.wait_for_unit("${backend}-nginx.service")
${backend}.wait_for_open_port(8181) ${backend}.wait_for_open_port(8181)
${backend}.wait_until_succeeds("curl -f http://localhost:8181 | grep Hello") ${backend}.wait_until_succeeds("curl -f http://localhost:8181 | grep Hello")
output = json.loads(${backend}.succeed("${backend} inspect nginx --format json").strip())[0]
${backend}.succeed("systemctl stop ${backend}-nginx.service", timeout=10) ${backend}.succeed("systemctl stop ${backend}-nginx.service", timeout=10)
assert output['HostConfig']['CapAdd'] == ["CAP_AUDIT_READ"]
assert output['HostConfig']['CapDrop'] == ${if backend == "docker" then "[\"CAP_AUDIT_WRITE\"]" else "[]"} # Rootless podman runs with no capabilities so it cannot drop them
assert output['HostConfig']['Privileged'] == False
assert output['HostConfig']['Devices'] == [{'PathOnHost': '/dev/random', 'PathInContainer': '/dev/random', 'CgroupPermissions': '${if backend == "docker" then "rwm" else ""}'}]
''; '';
}; };