llama-swap: init module

Co-authored-by: podium868909 <89096245@proton.me>
This commit is contained in:
06kellyjac
2025-06-16 19:57:05 +08:00
committed by Pascal Bach
parent 615db5eecf
commit 110edff547
3 changed files with 127 additions and 0 deletions

View File

@@ -50,6 +50,8 @@
- [go-httpbin](https://github.com/mccutchen/go-httpbin), a reasonably complete and well-tested golang port of httpbin, with zero dependencies outside the go stdlib. Available as [services.go-httpbin](#opt-services.go-httpbin.enable).
- [llama-swap](https://github.com/mostlygeek/llama-swap), a light weight transparent proxy server that provides automatic model swapping to llama.cpp's server (or any server with an OpenAI compatible endpoint). Available as [](#opt-services.llama-swap.enable).
- [tuwunel](https://matrix-construct.github.io/tuwunel/), a federated chat server implementing the Matrix protocol, forked from Conduwuit. Available as [services.matrix-tuwunel](#opt-services.matrix-tuwunel.enable).
- [Broadcast Box](https://github.com/Glimesh/broadcast-box), a WebRTC broadcast server. Available as [services.broadcast-box](options.html#opt-services.broadcast-box.enable).

View File

@@ -1217,6 +1217,7 @@
./services/networking/libreswan.nix
./services/networking/livekit-ingress.nix
./services/networking/livekit.nix
./services/networking/llama-swap.nix
./services/networking/lldpd.nix
./services/networking/logmein-hamachi.nix
./services/networking/lokinet.nix

View File

@@ -0,0 +1,124 @@
{
config,
lib,
pkgs,
...
}:
let
cfg = config.services.llama-swap;
settingsFormat = pkgs.formats.yaml { };
configFile = settingsFormat.generate "config.yaml" cfg.settings;
in
{
options.services.llama-swap = {
enable = lib.mkEnableOption "enable the llama-swap service";
package = lib.mkPackageOption pkgs "llama-swap" { };
port = lib.mkOption {
default = 8080;
example = 11343;
type = lib.types.port;
description = ''
Port that llama-swap listens on.
'';
};
openFirewall = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
Whether to open the firewall for llama-swap.
This adds {option}`port` to [](#opt-networking.firewall.allowedTCPPorts).
'';
};
settings = lib.mkOption {
type = lib.types.submodule { freeformType = settingsFormat.type; };
description = ''
llama-swap configuration. Refer to the [llama-swap example configuration](https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml)
for details on supported values.
'';
example = lib.literalExpression ''
let
llama-cpp = pkgs.llama-cpp.override { rocmSupport = true; };
llama-server = lib.getExe' llama-cpp "llama-server";
in
{
healthCheckTimeout = 60;
models = {
"some-model" = {
cmd = "$\{llama-server\} --port ''\${PORT} -m /var/lib/llama-cpp/models/some-model.gguf -ngl 0 --no-webui";
aliases = [
"the-best"
];
};
"other-model" = {
proxy = "http://127.0.0.1:5555";
cmd = "$\{llama-server\} --port 5555 -m /var/lib/llama-cpp/models/other-model.gguf -ngl 0 -c 4096 -np 4 --no-webui";
concurrencyLimit = 4;
};
};
};
'';
};
};
config = lib.mkIf cfg.enable {
systemd.services.llama-swap = {
description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "exec";
ExecStart = "${lib.getExe cfg.package} --listen :${toString cfg.port} --config ${configFile}";
Restart = "on-failure";
RestartSec = 3;
# for GPU acceleration
PrivateDevices = false;
# hardening
DynamicUser = true;
CapabilityBoundingSet = "";
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
"AF_UNIX"
];
NoNewPrivileges = true;
PrivateMounts = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectSystem = "strict";
MemoryDenyWriteExecute = true;
LockPersonality = true;
RemoveIPC = true;
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
SystemCallFilter = [
"@system-service"
"~@privileged"
];
SystemCallErrorNumber = "EPERM";
ProtectProc = "invisible";
ProtectHostname = true;
ProcSubset = "pid";
};
};
networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
};
meta.maintainers = with lib.maintainers; [
jk
podium868909
];
}