wyoming-faster-whisper: 2.4.0 -> 2.5.0 (#417413)

2025-06-25 05:18:48 +02:00
parent a53e6ff1ed f495eb597f
commit 0afcbf81f8
2 changed files with 246 additions and 203 deletions
--- a/nixos/modules/services/home-automation/wyoming/faster-whisper.nix
+++ b/nixos/modules/services/home-automation/wyoming/faster-whisper.nix
@@ -10,9 +10,11 @@ let
  cfg = config.services.wyoming.faster-whisper;

  inherit (lib)
+    mapAttrsToList
    mkOption
    mkEnableOption
    mkPackageOption
+    optionals
    types
    ;

@@ -24,6 +26,13 @@ let
    escapeSystemdExecArgs
    ;

+  finalPackage = cfg.package.overridePythonAttrs (oldAttrs: {
+    dependencies =
+      oldAttrs.dependencies
+      # for transformer model support
+      ++ optionals cfg.useTransformers oldAttrs.optional-dependencies.transformers;
+  });
+
 in

 {
@@ -35,215 +44,230 @@ in
      description = ''
        Attribute set of wyoming-faster-whisper instances to spawn.
      '';
-      type = types.attrsOf (
-        types.submodule (
-          { ... }:
-          {
-            options = {
-              enable = mkEnableOption "Wyoming faster-whisper server";
+      type = attrsOf (submodule {
+        options = {
+          enable = mkEnableOption "Wyoming faster-whisper server";

-              model = mkOption {
-                type = str;
-                default = "tiny-int8";
-                example = "Systran/faster-distil-whisper-small.en";
-                description = ''
-                  Name of the voice model to use. Can also be a HuggingFace model ID or a path to
-                  a custom model directory.
+          model = mkOption {
+            type = str;
+            default = "tiny-int8";
+            example = "Systran/faster-distil-whisper-small.en";
+            # https://github.com/home-assistant/addons/blob/master/whisper/DOCS.md#option-model
+            description = ''
+              Name of the voice model to use. Can also be a HuggingFace model ID or a path to
+              a custom model directory.

-                  Compressed models (`int8`) are slightly less accurate, but smaller and faster.
+              With {option}`useTranformers` enabled, a HuggingFace transformers Whisper model
+              ID from HuggingFace like `openai/whisper-tiny.en` must be used.

-                  Available models:
-                  - `tiny-int8` (compressed)
-                  - `tiny`
-                  - `tiny.en` (English only)
-                  - `base-int8` (compressed)
-                  - `base`
-                  - `base.en` (English only)
-                  - `small-int8` (compressed)
-                  - `distil-small.en` (distilled, English only)
-                  - `small`
-                  - `small.en` (English only)
-                  - `medium-int8` (compressed)
-                  - `distil-medium.en` (distilled, English only)
-                  - `medium`
-                  - `medium.en` (English only)
-                  - `large`
-                  - `large-v1`
-                  - `distil-large-v2` (distilled, English only)
-                  - `large-v2`
-                  - `distil-large-v3` (distilled, English only)
-                  - `large-v3`
-                  - `turbo` (faster than large-v3)
-                '';
-              };
+              Compressed models (`int8`) are slightly less accurate, but smaller and faster.
+              Distilled models are uncompressed and faster and smaller than non-distilled models.

-              uri = mkOption {
-                type = strMatching "^(tcp|unix)://.*$";
-                example = "tcp://0.0.0.0:10300";
-                description = ''
-                  URI to bind the wyoming server to.
-                '';
-              };
+              Available models:
+              - `tiny-int8` (compressed)
+              - `tiny`
+              - `tiny.en` (English only)
+              - `base-int8` (compressed)
+              - `base`
+              - `base.en` (English only)
+              - `small-int8` (compressed)
+              - `distil-small.en` (distilled, English only)
+              - `small`
+              - `small.en` (English only)
+              - `medium-int8` (compressed)
+              - `distil-medium.en` (distilled, English only)
+              - `medium`
+              - `medium.en` (English only)
+              - `large`
+              - `large-v1`
+              - `distil-large-v2` (distilled, English only)
+              - `large-v2`
+              - `distil-large-v3` (distilled, English only)
+              - `large-v3`
+              - `turbo` (faster than large-v3)
+            '';
+          };

-              device = mkOption {
-                # https://opennmt.net/CTranslate2/python/ctranslate2.models.Whisper.html#
-                type = types.enum [
-                  "cpu"
-                  "cuda"
-                  "auto"
-                ];
-                default = "cpu";
-                description = ''
-                  Determines the platform faster-whisper is run on. CPU works everywhere, CUDA requires a compatible NVIDIA GPU.
-                '';
-              };
+          useTransformers = mkOption {
+            type = bool;
+            default = false;
+            description = ''
+              Whether to provide the dependencies to allow using transformer models.
+            '';
+          };

-              language = mkOption {
-                type = enum [
-                  # https://github.com/home-assistant/addons/blob/master/whisper/config.yaml#L20
-                  "auto"
-                  "af"
-                  "am"
-                  "ar"
-                  "as"
-                  "az"
-                  "ba"
-                  "be"
-                  "bg"
-                  "bn"
-                  "bo"
-                  "br"
-                  "bs"
-                  "ca"
-                  "cs"
-                  "cy"
-                  "da"
-                  "de"
-                  "el"
-                  "en"
-                  "es"
-                  "et"
-                  "eu"
-                  "fa"
-                  "fi"
-                  "fo"
-                  "fr"
-                  "gl"
-                  "gu"
-                  "ha"
-                  "haw"
-                  "he"
-                  "hi"
-                  "hr"
-                  "ht"
-                  "hu"
-                  "hy"
-                  "id"
-                  "is"
-                  "it"
-                  "ja"
-                  "jw"
-                  "ka"
-                  "kk"
-                  "km"
-                  "kn"
-                  "ko"
-                  "la"
-                  "lb"
-                  "ln"
-                  "lo"
-                  "lt"
-                  "lv"
-                  "mg"
-                  "mi"
-                  "mk"
-                  "ml"
-                  "mn"
-                  "mr"
-                  "ms"
-                  "mt"
-                  "my"
-                  "ne"
-                  "nl"
-                  "nn"
-                  "no"
-                  "oc"
-                  "pa"
-                  "pl"
-                  "ps"
-                  "pt"
-                  "ro"
-                  "ru"
-                  "sa"
-                  "sd"
-                  "si"
-                  "sk"
-                  "sl"
-                  "sn"
-                  "so"
-                  "sq"
-                  "sr"
-                  "su"
-                  "sv"
-                  "sw"
-                  "ta"
-                  "te"
-                  "tg"
-                  "th"
-                  "tk"
-                  "tl"
-                  "tr"
-                  "tt"
-                  "uk"
-                  "ur"
-                  "uz"
-                  "vi"
-                  "yi"
-                  "yue"
-                  "yo"
-                  "zh"
-                ];
-                example = "en";
-                description = ''
-                  The language used to to parse words and sentences.
-                '';
-              };
+          uri = mkOption {
+            type = strMatching "^(tcp|unix)://.*$";
+            example = "tcp://0.0.0.0:10300";
+            description = ''
+              URI to bind the wyoming server to.
+            '';
+          };

-              initialPrompt = mkOption {
-                type = nullOr str;
-                default = null;
-                example = ''
-                  The following conversation takes place in the universe of Wizard of Oz. Key terms include 'Yellow Brick Road' (the path to follow), 'Emerald City' (the ultimate goal), and 'Ruby Slippers' (the magical tools to succeed). Keep these in mind as they guide the journey.
-                '';
-                description = ''
-                  Optional text to provide as a prompt for the first window. This can be used to provide, or
-                  "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
-                  to make it more likely to predict those word correctly.
-                '';
-              };
+          device = mkOption {
+            # https://opennmt.net/CTranslate2/python/ctranslate2.models.Whisper.html#
+            type = enum [
+              "cpu"
+              "cuda"
+              "auto"
+            ];
+            default = "cpu";
+            description = ''
+              Determines the platform faster-whisper is run on. CPU works everywhere, CUDA requires a compatible NVIDIA GPU.
+            '';
+          };

-              beamSize = mkOption {
-                type = ints.unsigned;
-                default = 0;
-                example = 5;
-                description = ''
-                  The number of beams to use in beam search.
-                  Use `0` to automatically select a value based on the CPU.
-                '';
-                apply = toString;
-              };
+          language = mkOption {
+            type = enum [
+              # https://github.com/home-assistant/addons/blob/master/whisper/config.yaml#L20
+              "auto"
+              "af"
+              "am"
+              "ar"
+              "as"
+              "az"
+              "ba"
+              "be"
+              "bg"
+              "bn"
+              "bo"
+              "br"
+              "bs"
+              "ca"
+              "cs"
+              "cy"
+              "da"
+              "de"
+              "el"
+              "en"
+              "es"
+              "et"
+              "eu"
+              "fa"
+              "fi"
+              "fo"
+              "fr"
+              "gl"
+              "gu"
+              "ha"
+              "haw"
+              "he"
+              "hi"
+              "hr"
+              "ht"
+              "hu"
+              "hy"
+              "id"
+              "is"
+              "it"
+              "ja"
+              "jw"
+              "ka"
+              "kk"
+              "km"
+              "kn"
+              "ko"
+              "la"
+              "lb"
+              "ln"
+              "lo"
+              "lt"
+              "lv"
+              "mg"
+              "mi"
+              "mk"
+              "ml"
+              "mn"
+              "mr"
+              "ms"
+              "mt"
+              "my"
+              "ne"
+              "nl"
+              "nn"
+              "no"
+              "oc"
+              "pa"
+              "pl"
+              "ps"
+              "pt"
+              "ro"
+              "ru"
+              "sa"
+              "sd"
+              "si"
+              "sk"
+              "sl"
+              "sn"
+              "so"
+              "sq"
+              "sr"
+              "su"
+              "sv"
+              "sw"
+              "ta"
+              "te"
+              "tg"
+              "th"
+              "tk"
+              "tl"
+              "tr"
+              "tt"
+              "uk"
+              "ur"
+              "uz"
+              "vi"
+              "yi"
+              "yue"
+              "yo"
+              "zh"
+            ];
+            example = "en";
+            description = ''
+              The language used to to parse words and sentences.
+            '';
+          };

-              extraArgs = mkOption {
-                type = listOf str;
-                default = [ ];
-                description = ''
-                  Extra arguments to pass to the server commandline.
-                '';
-              };
-            };
-          }
-        )
-      );
+          initialPrompt = mkOption {
+            type = nullOr str;
+            default = null;
+            # https://github.com/home-assistant/addons/blob/master/whisper/DOCS.md#option-custom_model_type
+            example = ''
+              The following conversation takes place in the universe of
+              Wizard of Oz. Key terms include 'Yellow Brick Road' (the path
+              to follow), 'Emerald City' (the ultimate goal), and 'Ruby
+              Slippers' (the magical tools to succeed). Keep these in mind as
+              they guide the journey.
+            '';
+            description = ''
+              Optional text to provide as a prompt for the first window. This can be used to provide, or
+              "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
+              to make it more likely to predict those word correctly.
+
+              Not supported when the {option}`customModelType` is `transformers`.
+            '';
+          };
+
+          beamSize = mkOption {
+            type = ints.unsigned;
+            default = 0;
+            example = 5;
+            description = ''
+              The number of beams to use in beam search.
+              Use `0` to automatically select a value based on the CPU.
+            '';
+            apply = toString;
+          };
+
+          extraArgs = mkOption {
+            type = listOf str;
+            default = [ ];
+            description = ''
+              Extra arguments to pass to the server commandline.
+            '';
+          };
+        };
+      });
    };
  };

@@ -256,6 +280,13 @@ in
        ;
    in
    mkIf (cfg.servers != { }) {
+      assertions = mapAttrsToList (
+        server: options: {
+          assertion = options.useTransformers -> options.initialPromt == null;
+          message = "wyoming-faster-whisper/${server}: Transformer models (`useTransformers`) do not currently support an `initialPrompt`.";
+        }
+      );
+
      systemd.services = mapAttrs' (
        server: options:
        nameValuePair "wyoming-faster-whisper-${server}" {
@@ -279,7 +310,7 @@ in
            # https://github.com/home-assistant/addons/blob/master/whisper/rootfs/etc/s6-overlay/s6-rc.d/whisper/run
            ExecStart = escapeSystemdExecArgs (
              [
-                (lib.getExe cfg.package)
+                (lib.getExe finalPackage)
                "--data-dir"
                "/var/lib/wyoming/faster-whisper"
                "--uri"
@@ -293,6 +324,9 @@ in
                "--beam-size"
                options.beamSize
              ]
+              ++ lib.optionals options.useTransformers [
+                "--use-transformers"
+              ]
              ++ lib.optionals (options.initialPrompt != null) [
                "--initial-prompt"
                options.initialPrompt
--- a/pkgs/by-name/wy/wyoming-faster-whisper/package.nix
+++ b/pkgs/by-name/wy/wyoming-faster-whisper/package.nix
@@ -6,14 +6,14 @@

 python3Packages.buildPythonApplication rec {
  pname = "wyoming-faster-whisper";
-  version = "2.4.0";
+  version = "2.5.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "rhasspy";
    repo = "wyoming-faster-whisper";
    rev = "refs/tags/v${version}";
-    hash = "sha256-Ai28i+2/oWI2Y61x7U5an5MBHfuBaGy6qZZwZydS308=";
+    hash = "sha256-MKB6gZdGdAYoNK8SRiDHG8xtMZ5mXdaSn+bH4T6o/K4=";
  };

  build-system = with python3Packages; [
@@ -30,6 +30,15 @@ python3Packages.buildPythonApplication rec {
    wyoming
  ];

+  optional-dependencies = {
+    transformers =
+      with python3Packages;
+      [
+        transformers
+      ]
+      ++ transformers.optional-dependencies.torch;
+  };
+
  pythonImportsCheck = [
    "wyoming_faster_whisper"
  ];