llama-swap: init module
Co-authored-by: podium868909 <89096245@proton.me>
This commit is contained in:
@@ -50,6 +50,8 @@
|
|||||||
|
|
||||||
- [go-httpbin](https://github.com/mccutchen/go-httpbin), a reasonably complete and well-tested golang port of httpbin, with zero dependencies outside the go stdlib. Available as [services.go-httpbin](#opt-services.go-httpbin.enable).
|
- [go-httpbin](https://github.com/mccutchen/go-httpbin), a reasonably complete and well-tested golang port of httpbin, with zero dependencies outside the go stdlib. Available as [services.go-httpbin](#opt-services.go-httpbin.enable).
|
||||||
|
|
||||||
|
- [llama-swap](https://github.com/mostlygeek/llama-swap), a light weight transparent proxy server that provides automatic model swapping to llama.cpp's server (or any server with an OpenAI compatible endpoint). Available as [](#opt-services.llama-swap.enable).
|
||||||
|
|
||||||
- [tuwunel](https://matrix-construct.github.io/tuwunel/), a federated chat server implementing the Matrix protocol, forked from Conduwuit. Available as [services.matrix-tuwunel](#opt-services.matrix-tuwunel.enable).
|
- [tuwunel](https://matrix-construct.github.io/tuwunel/), a federated chat server implementing the Matrix protocol, forked from Conduwuit. Available as [services.matrix-tuwunel](#opt-services.matrix-tuwunel.enable).
|
||||||
|
|
||||||
- [Broadcast Box](https://github.com/Glimesh/broadcast-box), a WebRTC broadcast server. Available as [services.broadcast-box](options.html#opt-services.broadcast-box.enable).
|
- [Broadcast Box](https://github.com/Glimesh/broadcast-box), a WebRTC broadcast server. Available as [services.broadcast-box](options.html#opt-services.broadcast-box.enable).
|
||||||
|
|||||||
@@ -1217,6 +1217,7 @@
|
|||||||
./services/networking/libreswan.nix
|
./services/networking/libreswan.nix
|
||||||
./services/networking/livekit-ingress.nix
|
./services/networking/livekit-ingress.nix
|
||||||
./services/networking/livekit.nix
|
./services/networking/livekit.nix
|
||||||
|
./services/networking/llama-swap.nix
|
||||||
./services/networking/lldpd.nix
|
./services/networking/lldpd.nix
|
||||||
./services/networking/logmein-hamachi.nix
|
./services/networking/logmein-hamachi.nix
|
||||||
./services/networking/lokinet.nix
|
./services/networking/lokinet.nix
|
||||||
|
|||||||
124
nixos/modules/services/networking/llama-swap.nix
Normal file
124
nixos/modules/services/networking/llama-swap.nix
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
{
|
||||||
|
config,
|
||||||
|
lib,
|
||||||
|
pkgs,
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
let
|
||||||
|
cfg = config.services.llama-swap;
|
||||||
|
settingsFormat = pkgs.formats.yaml { };
|
||||||
|
configFile = settingsFormat.generate "config.yaml" cfg.settings;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
options.services.llama-swap = {
|
||||||
|
enable = lib.mkEnableOption "enable the llama-swap service";
|
||||||
|
|
||||||
|
package = lib.mkPackageOption pkgs "llama-swap" { };
|
||||||
|
|
||||||
|
port = lib.mkOption {
|
||||||
|
default = 8080;
|
||||||
|
example = 11343;
|
||||||
|
type = lib.types.port;
|
||||||
|
description = ''
|
||||||
|
Port that llama-swap listens on.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
openFirewall = lib.mkOption {
|
||||||
|
type = lib.types.bool;
|
||||||
|
default = false;
|
||||||
|
description = ''
|
||||||
|
Whether to open the firewall for llama-swap.
|
||||||
|
This adds {option}`port` to [](#opt-networking.firewall.allowedTCPPorts).
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
settings = lib.mkOption {
|
||||||
|
type = lib.types.submodule { freeformType = settingsFormat.type; };
|
||||||
|
description = ''
|
||||||
|
llama-swap configuration. Refer to the [llama-swap example configuration](https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml)
|
||||||
|
for details on supported values.
|
||||||
|
'';
|
||||||
|
example = lib.literalExpression ''
|
||||||
|
let
|
||||||
|
llama-cpp = pkgs.llama-cpp.override { rocmSupport = true; };
|
||||||
|
llama-server = lib.getExe' llama-cpp "llama-server";
|
||||||
|
in
|
||||||
|
{
|
||||||
|
healthCheckTimeout = 60;
|
||||||
|
models = {
|
||||||
|
"some-model" = {
|
||||||
|
cmd = "$\{llama-server\} --port ''\${PORT} -m /var/lib/llama-cpp/models/some-model.gguf -ngl 0 --no-webui";
|
||||||
|
aliases = [
|
||||||
|
"the-best"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
"other-model" = {
|
||||||
|
proxy = "http://127.0.0.1:5555";
|
||||||
|
cmd = "$\{llama-server\} --port 5555 -m /var/lib/llama-cpp/models/other-model.gguf -ngl 0 -c 4096 -np 4 --no-webui";
|
||||||
|
concurrencyLimit = 4;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
config = lib.mkIf cfg.enable {
|
||||||
|
systemd.services.llama-swap = {
|
||||||
|
description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
|
||||||
|
after = [ "network.target" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "exec";
|
||||||
|
ExecStart = "${lib.getExe cfg.package} --listen :${toString cfg.port} --config ${configFile}";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = 3;
|
||||||
|
|
||||||
|
# for GPU acceleration
|
||||||
|
PrivateDevices = false;
|
||||||
|
|
||||||
|
# hardening
|
||||||
|
DynamicUser = true;
|
||||||
|
CapabilityBoundingSet = "";
|
||||||
|
RestrictAddressFamilies = [
|
||||||
|
"AF_INET"
|
||||||
|
"AF_INET6"
|
||||||
|
"AF_UNIX"
|
||||||
|
];
|
||||||
|
NoNewPrivileges = true;
|
||||||
|
PrivateMounts = true;
|
||||||
|
PrivateTmp = true;
|
||||||
|
PrivateUsers = true;
|
||||||
|
ProtectClock = true;
|
||||||
|
ProtectControlGroups = true;
|
||||||
|
ProtectHome = true;
|
||||||
|
ProtectKernelLogs = true;
|
||||||
|
ProtectKernelModules = true;
|
||||||
|
ProtectKernelTunables = true;
|
||||||
|
ProtectSystem = "strict";
|
||||||
|
MemoryDenyWriteExecute = true;
|
||||||
|
LockPersonality = true;
|
||||||
|
RemoveIPC = true;
|
||||||
|
RestrictNamespaces = true;
|
||||||
|
RestrictRealtime = true;
|
||||||
|
RestrictSUIDSGID = true;
|
||||||
|
SystemCallArchitectures = "native";
|
||||||
|
SystemCallFilter = [
|
||||||
|
"@system-service"
|
||||||
|
"~@privileged"
|
||||||
|
];
|
||||||
|
SystemCallErrorNumber = "EPERM";
|
||||||
|
ProtectProc = "invisible";
|
||||||
|
ProtectHostname = true;
|
||||||
|
ProcSubset = "pid";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
|
||||||
|
};
|
||||||
|
|
||||||
|
meta.maintainers = with lib.maintainers; [
|
||||||
|
jk
|
||||||
|
podium868909
|
||||||
|
];
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user