From 8eb37d0bf5b60d876cc242acde0ebb091550d1a8 Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 18:28:26 +0200 Subject: [PATCH 1/8] nltk-data: add bengsparks to maintainers --- pkgs/tools/text/nltk-data/default.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index d9613b86637a..bdb1ad9af6de 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -15,7 +15,10 @@ let homepage = "https://github.com/nltk/nltk_data"; license = licenses.asl20; platforms = platforms.all; - maintainers = with maintainers; [ happysalada ]; + maintainers = with maintainers; [ + bengsparks + happysalada + ]; }; }; makeNltkDataPackage = From 03639c657abaf39b7288dba212cd2ec8eb062626 Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 15:50:35 +0200 Subject: [PATCH 2/8] nltk-data: skip fixupPhase to avoid needless patching attempts There are no scripts in the `package` folder, only datasets, models, etc. --- pkgs/tools/text/nltk-data/default.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index bdb1ad9af6de..b3e66f7fe4a7 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -10,6 +10,7 @@ let version = "0-unstable-2024-07-29"; nativeBuildInputs = [ unzip ]; dontBuild = true; + dontFixup = true; meta = with lib; { description = "NLTK Data"; homepage = "https://github.com/nltk/nltk_data"; From 1657909e8e7b4bd84a967d9183c086efbd225372 Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 18:36:40 +0200 Subject: [PATCH 3/8] nltk-data: fix searchability nltk-data is an attribute set, which leads to nixos search omitting it. Wrapping it in `recurseIntoAttrs` remedies this. --- pkgs/top-level/all-packages.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 12e622afedf0..efdd3ff8ec1b 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -2344,7 +2344,7 @@ with pkgs; mpd-sima = python3Packages.callPackage ../tools/audio/mpd-sima { }; - nltk-data = callPackage ../tools/text/nltk-data { }; + nltk-data = lib.recurseIntoAttrs (callPackage ../tools/text/nltk-data { }); seabios-coreboot = seabios.override { ___build-type = "coreboot"; }; seabios-csm = seabios.override { ___build-type = "csm"; }; From 4b08542fbef3ccd72ce5db2315e7510b7e603184 Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 23:18:54 +0200 Subject: [PATCH 4/8] treewide: nltk-data.punkt_tab -> nltk-data.punkt-tab --- pkgs/by-name/pa/paperless-ngx/package.nix | 2 +- pkgs/development/python-modules/aider-chat/default.nix | 2 +- pkgs/development/python-modules/type-infer/default.nix | 2 +- pkgs/tools/text/nltk-data/default.nix | 2 +- pkgs/top-level/aliases.nix | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pkgs/by-name/pa/paperless-ngx/package.nix b/pkgs/by-name/pa/paperless-ngx/package.nix index 41445394d5b8..d05b49ca16e2 100644 --- a/pkgs/by-name/pa/paperless-ngx/package.nix +++ b/pkgs/by-name/pa/paperless-ngx/package.nix @@ -313,7 +313,7 @@ python.pkgs.buildPythonApplication rec { tesseract5 ; nltkData = with nltk-data; [ - punkt_tab + punkt-tab snowball_data stopwords ]; diff --git a/pkgs/development/python-modules/aider-chat/default.nix b/pkgs/development/python-modules/aider-chat/default.nix index 4aa6ae731311..bc4534b8da61 100644 --- a/pkgs/development/python-modules/aider-chat/default.nix +++ b/pkgs/development/python-modules/aider-chat/default.nix @@ -125,7 +125,7 @@ let aider-nltk-data = symlinkJoin { name = "aider-nltk-data"; paths = [ - nltk-data.punkt_tab + nltk-data.punkt-tab nltk-data.stopwords ]; }; diff --git a/pkgs/development/python-modules/type-infer/default.nix b/pkgs/development/python-modules/type-infer/default.nix index 4ce88d3b049f..4b8ef9e99d5e 100644 --- a/pkgs/development/python-modules/type-infer/default.nix +++ b/pkgs/development/python-modules/type-infer/default.nix @@ -24,7 +24,7 @@ let name = "nltk-test-data"; paths = [ nltk-data.punkt - nltk-data.punkt_tab + nltk-data.punkt-tab nltk-data.stopwords ]; }; diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index b3e66f7fe4a7..e2256c75d087 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -61,7 +61,7 @@ lib.makeScope newScope (self: { location = "tokenizers"; hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; }; - punkt_tab = makeNltkDataPackage { + punkt-tab = makeNltkDataPackage { pname = "punkt_tab"; location = "tokenizers"; hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix index ebd3a2b31f29..8d1c3c3cf3b3 100644 --- a/pkgs/top-level/aliases.nix +++ b/pkgs/top-level/aliases.nix @@ -1349,6 +1349,7 @@ mapAliases { # When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable. nixosTest = testers.nixosTest; # Added 2022-05-05 + nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21 nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17 noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10 nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23 From 869486464c9717d18a4cb68a4daee123c35dbdf4 Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 23:19:56 +0200 Subject: [PATCH 5/8] treewide: nltk-data.averaged_perceptron_tagger -> nltk-data.averaged-perceptron-tagger --- pkgs/by-name/un/unstructured-api/package.nix | 2 +- pkgs/tools/text/nltk-data/default.nix | 2 +- pkgs/top-level/aliases.nix | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkgs/by-name/un/unstructured-api/package.nix b/pkgs/by-name/un/unstructured-api/package.nix index 96a5a15584d8..adf9fa83dd96 100644 --- a/pkgs/by-name/un/unstructured-api/package.nix +++ b/pkgs/by-name/un/unstructured-api/package.nix @@ -152,7 +152,7 @@ let paths = [ nltk-data.punkt - nltk-data.averaged_perceptron_tagger + nltk-data.averaged-perceptron-tagger ]; }; in diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index e2256c75d087..c4a99c0456c3 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -66,7 +66,7 @@ lib.makeScope newScope (self: { location = "tokenizers"; hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; }; - averaged_perceptron_tagger = makeNltkDataPackage { + averaged-perceptron-tagger = makeNltkDataPackage { pname = "averaged_perceptron_tagger"; location = "taggers"; hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix index 8d1c3c3cf3b3..4019d2823ee3 100644 --- a/pkgs/top-level/aliases.nix +++ b/pkgs/top-level/aliases.nix @@ -1349,6 +1349,7 @@ mapAliases { # When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable. nixosTest = testers.nixosTest; # Added 2022-05-05 + nltk-data.averaged_perceptron_tagger = nltk-data.averaged-perceptron-tagger; # Added 2025-05-21 nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21 nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17 noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10 From 60d135d45a865d8a64e57a7acabdab4a86c5cb5e Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 23:20:49 +0200 Subject: [PATCH 6/8] treewide: nltk-data.averaged_perceptron_tagger_eng -> nltk-data.averaged-perceptron-tagger-eng --- nixos/modules/services/web-apps/mealie.nix | 2 +- pkgs/by-name/me/mealie/package.nix | 2 +- .../python-modules/ingredient-parser-nlp/default.nix | 2 +- pkgs/tools/text/nltk-data/default.nix | 2 +- pkgs/top-level/aliases.nix | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/nixos/modules/services/web-apps/mealie.nix b/nixos/modules/services/web-apps/mealie.nix index 46571fea4617..b8f65b1fb98e 100644 --- a/nixos/modules/services/web-apps/mealie.nix +++ b/nixos/modules/services/web-apps/mealie.nix @@ -76,7 +76,7 @@ in API_PORT = toString cfg.port; BASE_URL = "http://localhost:${toString cfg.port}"; DATA_DIR = "/var/lib/mealie"; - NLTK_DATA = pkgs.nltk-data.averaged_perceptron_tagger_eng; + NLTK_DATA = pkgs.nltk-data.averaged-perceptron-tagger-eng; } // (builtins.mapAttrs (_: val: toString val) cfg.settings); serviceConfig = { diff --git a/pkgs/by-name/me/mealie/package.nix b/pkgs/by-name/me/mealie/package.nix index 0092eab6b5f3..6a7897f9a79f 100644 --- a/pkgs/by-name/me/mealie/package.nix +++ b/pkgs/by-name/me/mealie/package.nix @@ -109,7 +109,7 @@ pythonpkgs.buildPythonApplication rec { # Needed for tests preCheck = '' - export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng} + export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng} ''; disabledTestPaths = [ diff --git a/pkgs/development/python-modules/ingredient-parser-nlp/default.nix b/pkgs/development/python-modules/ingredient-parser-nlp/default.nix index 88d03d362361..ee07bd34acd3 100644 --- a/pkgs/development/python-modules/ingredient-parser-nlp/default.nix +++ b/pkgs/development/python-modules/ingredient-parser-nlp/default.nix @@ -44,7 +44,7 @@ buildPythonPackage rec { # Needed for tests preCheck = '' - export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng} + export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng} ''; meta = { diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index c4a99c0456c3..96125bd351f2 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -71,7 +71,7 @@ lib.makeScope newScope (self: { location = "taggers"; hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; }; - averaged_perceptron_tagger_eng = makeNltkDataPackage { + averaged-perceptron-tagger-eng = makeNltkDataPackage { pname = "averaged_perceptron_tagger_eng"; location = "taggers"; hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix index 4019d2823ee3..6585e44b1b47 100644 --- a/pkgs/top-level/aliases.nix +++ b/pkgs/top-level/aliases.nix @@ -1350,6 +1350,7 @@ mapAliases { nixosTest = testers.nixosTest; # Added 2022-05-05 nltk-data.averaged_perceptron_tagger = nltk-data.averaged-perceptron-tagger; # Added 2025-05-21 + nltk-data.averaged_perceptron_tagger_eng = nltk-data.averaged-perceptron-tagger-eng; # Added 2025-05-21 nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21 nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17 noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10 From 4375d986d6bd048ad9ccfe3af2cbb644a966c729 Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 23:21:19 +0200 Subject: [PATCH 7/8] treewide: nltk-data.snowball_data -> nltk-data.snowball-data --- pkgs/by-name/pa/paperless-ngx/package.nix | 2 +- pkgs/tools/text/nltk-data/default.nix | 2 +- pkgs/top-level/aliases.nix | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkgs/by-name/pa/paperless-ngx/package.nix b/pkgs/by-name/pa/paperless-ngx/package.nix index d05b49ca16e2..3a1fa7f93294 100644 --- a/pkgs/by-name/pa/paperless-ngx/package.nix +++ b/pkgs/by-name/pa/paperless-ngx/package.nix @@ -314,7 +314,7 @@ python.pkgs.buildPythonApplication rec { ; nltkData = with nltk-data; [ punkt-tab - snowball_data + snowball-data stopwords ]; tests = { inherit (nixosTests) paperless; }; diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index 96125bd351f2..1ee377e4757b 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -76,7 +76,7 @@ lib.makeScope newScope (self: { location = "taggers"; hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; }; - snowball_data = makeNltkDataPackage { + snowball-data = makeNltkDataPackage { pname = "snowball_data"; location = "stemmers"; hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix index 6585e44b1b47..07d2bfb5a6d2 100644 --- a/pkgs/top-level/aliases.nix +++ b/pkgs/top-level/aliases.nix @@ -1352,6 +1352,7 @@ mapAliases { nltk-data.averaged_perceptron_tagger = nltk-data.averaged-perceptron-tagger; # Added 2025-05-21 nltk-data.averaged_perceptron_tagger_eng = nltk-data.averaged-perceptron-tagger-eng; # Added 2025-05-21 nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21 + nltk-data.snowball_data = nltk-data.snowball-data; # Added 2025-05-21 nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17 noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10 nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23 From a7d3ac57c5c40c8a22ef892f4ff833d3bbac612a Mon Sep 17 00:00:00 2001 From: Benjamin Sparks Date: Wed, 21 May 2025 16:46:18 +0200 Subject: [PATCH 8/8] nltk-data: add all downloadables --- pkgs/tools/text/nltk-data/default.nix | 241 ++++++++++++++++++++++---- 1 file changed, 206 insertions(+), 35 deletions(-) diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index 1ee377e4757b..e4aa7f06c355 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -54,41 +54,212 @@ let ''; } ); + + makeChunker = + pname: + makeNltkDataPackage { + inherit pname; + location = "chunkers"; + hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc="; + }; + + makeCorpus = + pname: + makeNltkDataPackage { + inherit pname; + location = "corpora"; + hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; + }; + + makeGrammar = + pname: + makeNltkDataPackage { + inherit pname; + location = "grammars"; + hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM="; + }; + + makeHelp = + pname: + makeNltkDataPackage { + inherit pname; + location = "help"; + hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE="; + }; + + makeMisc = + pname: + makeNltkDataPackage { + inherit pname; + location = "misc"; + hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4="; + }; + + makeModel = + pname: + makeNltkDataPackage { + inherit pname; + location = "models"; + hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4="; + }; + + makeTagger = + pname: + makeNltkDataPackage { + inherit pname; + location = "taggers"; + hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; + }; + + makeTokenizer = + pname: + makeNltkDataPackage { + inherit pname; + location = "tokenizers"; + hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; + }; + + makeStemmer = + pname: + makeNltkDataPackage { + inherit pname; + location = "stemmers"; + hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; + }; in lib.makeScope newScope (self: { - punkt = makeNltkDataPackage { - pname = "punkt"; - location = "tokenizers"; - hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; - }; - punkt-tab = makeNltkDataPackage { - pname = "punkt_tab"; - location = "tokenizers"; - hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; - }; - averaged-perceptron-tagger = makeNltkDataPackage { - pname = "averaged_perceptron_tagger"; - location = "taggers"; - hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; - }; - averaged-perceptron-tagger-eng = makeNltkDataPackage { - pname = "averaged_perceptron_tagger_eng"; - location = "taggers"; - hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; - }; - snowball-data = makeNltkDataPackage { - pname = "snowball_data"; - location = "stemmers"; - hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; - }; - stopwords = makeNltkDataPackage { - pname = "stopwords"; - location = "corpora"; - hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; - }; - wordnet = makeNltkDataPackage { - pname = "wordnet"; - location = "corpora"; - hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; - }; + ## Chunkers + maxent-ne-chunker = makeChunker "maxent_ne_chunker"; + maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab"; + + ## Corpora + abc = makeCorpus "abc"; + alpino = makeCorpus "alpino"; + bcp47 = makeCorpus "bcp47"; + biocreative-ppi = makeCorpus "biocreative_ppi"; + brown = makeCorpus "brown"; + brown-tei = makeCorpus "brown_tei"; + cess-cat = makeCorpus "cess_cat"; + cess-esp = makeCorpus "cess_esp"; + chat80 = makeCorpus "chat80"; + city-database = makeCorpus "city_database"; + cmudict = makeCorpus "cmudict"; + comparative-sentences = makeCorpus "comparative_sentences"; + comtrans = makeCorpus "comtrans"; + conll2000 = makeCorpus "conll2000"; + conll2002 = makeCorpus "conll2002"; + conll2007 = makeCorpus "conll2007"; + crubadan = makeCorpus "crubadan"; + dependency-treebank = makeCorpus "dependency_treebank"; + dolch = makeCorpus "dolch"; + europarl-raw = makeCorpus "europarl_raw"; + extended-omw = makeCorpus "extended_omw"; + floresta = makeCorpus "floresta"; + framenet-v15 = makeCorpus "framenet_v15"; + framenet-v17 = makeCorpus "framenet_v17"; + gazetteers = makeCorpus "gazetteers"; + genesis = makeCorpus "genesis"; + gutenberg = makeCorpus "gutenberg"; + ieer = makeCorpus "ieer"; + inaugural = makeCorpus "inaugural"; + indian = makeCorpus "indian"; + jeita = makeCorpus "jeita"; + kimmo = makeCorpus "kimmo"; + knbc = makeCorpus "knbc"; + lin-thesaurus = makeCorpus "lin_thesaurus"; + mac-morpho = makeCorpus "mac_morpho"; + machado = makeCorpus "machado"; + masc-tagged = makeCorpus "masc_tagged"; + movie-reviews = makeCorpus "movie_reviews"; + mte-teip5 = makeCorpus "mte_teip5"; + names = makeCorpus "names"; + nombank-1-0 = makeCorpus "nombank.1.0"; + nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes"; + nps-chat = makeCorpus "nps_chat"; + omw = makeCorpus "omw"; + omw-1-4 = makeCorpus "omw-1.4"; + opinion-lexicon = makeCorpus "opinion_lexicon"; + panlex-swadesh = makeCorpus "panlex_swadesh"; + paradigms = makeCorpus "paradigms"; + pe08 = makeCorpus "pe08"; + pil = makeCorpus "pil"; + pl196x = makeCorpus "pl196x"; + ppattach = makeCorpus "ppattach"; + problem-reports = makeCorpus "problem_reports"; + product-reviews-1 = makeCorpus "product_reviews_1"; + product-reviews-2 = makeCorpus "product_reviews_2"; + propbank = makeCorpus "propbank"; + pros-cons = makeCorpus "pros_cons"; + ptb = makeCorpus "ptb"; + qc = makeCorpus "qc"; + reuters = makeCorpus "reuters"; + rte = makeCorpus "rte"; + semcor = makeCorpus "semcor"; + senseval = makeCorpus "senseval"; + sentence-polarity = makeCorpus "sentence_polarity"; + sentiwordnet = makeCorpus "sentiwordnet"; + shakespeare = makeCorpus "shakespeare"; + sinica-treebank = makeCorpus "sinica_treebank"; + smultron = makeCorpus "smultron"; + state-union = makeCorpus "state_union"; + stopwords = makeCorpus "stopwords"; + subjectivity = makeCorpus "subjectivity"; + swadesh = makeCorpus "swadesh"; + switchboard = makeCorpus "switchboard"; + timit = makeCorpus "timit"; + toolbox = makeCorpus "toolbox"; + treebank = makeCorpus "treebank"; + twitter-samples = makeCorpus "twitter_samples"; + udhr = makeCorpus "udhr"; + udhr2 = makeCorpus "udhr2"; + unicode-samples = makeCorpus "unicode_samples"; + universal-treebanks-v20 = makeCorpus "universal_treebanks_v20"; + verbnet = makeCorpus "verbnet"; + verbnet3 = makeCorpus "verbnet3"; + webtext = makeCorpus "webtext"; + wordnet = makeCorpus "wordnet"; + wordnet-ic = makeCorpus "wordnet_ic"; + wordnet2021 = makeCorpus "wordnet2021"; + wordnet2022 = makeCorpus "wordnet2022"; + wordnet31 = makeCorpus "wordnet31"; + words = makeCorpus "words"; + ycoe = makeCorpus "ycoe"; + + ## Grammars + basque-grammars = makeGrammar "basque_grammars"; + book-grammars = makeGrammar "book_grammars"; + large-grammars = makeGrammar "large_grammars"; + sample-grammars = makeGrammar "sample_grammars"; + spanish-grammars = makeGrammar "spanish_grammars"; + + ## Help + tagsets-json = makeHelp "tagsets_json"; + + ## Misc + mwa-ppdb = makeMisc "mwa_ppdb"; + perluniprops = makeMisc "perluniprops"; + + ## Models + bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux"; + moses-sample = makeModel "moses_sample"; + wmt15-eval = makeModel "wmt15_eval"; + word2vec-sample = makeModel "word2vec_sample"; + + ## Taggers + averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger"; + averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng"; + averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru"; + averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus"; + maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger"; + maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab"; + universal-tagset = makeTagger "universal_tagset"; + + ## Tokenizers + punkt = makeTokenizer "punkt"; + punkt-tab = makeTokenizer "punkt_tab"; + + ## Stemmers + porter-test = makeStemmer "porter_test"; + rslp = makeStemmer "rslp"; + snowball-data = makeStemmer "snowball_data"; })