Diff: https://github.com/mlcommons/croissant/compare/v1.0.21...v1.0.22 Changelog: https://github.com/mlcommons/croissant/releases/tag/v1.0.22
101 lines
1.9 KiB
Nix
101 lines
1.9 KiB
Nix
{
|
|
lib,
|
|
buildPythonPackage,
|
|
fetchFromGitHub,
|
|
|
|
# build-system
|
|
setuptools,
|
|
|
|
# dependencies
|
|
absl-py,
|
|
etils,
|
|
jsonpath-rw,
|
|
networkx,
|
|
pandas,
|
|
pandas-stubs,
|
|
python-dateutil,
|
|
rdflib,
|
|
requests,
|
|
scipy,
|
|
tqdm,
|
|
|
|
# tests
|
|
apache-beam,
|
|
gitpython,
|
|
librosa,
|
|
pillow,
|
|
pytestCheckHook,
|
|
pyyaml,
|
|
writableTmpDirAsHomeHook,
|
|
}:
|
|
|
|
buildPythonPackage rec {
|
|
pname = "mlcroissant";
|
|
version = "1.0.22";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "mlcommons";
|
|
repo = "croissant";
|
|
tag = "v${version}";
|
|
hash = "sha256-uJOxKNrK3eN2wyPFEQr2J4+vZeSK1KPyFDag2jcyWZw=";
|
|
};
|
|
|
|
sourceRoot = "${src.name}/python/mlcroissant";
|
|
|
|
build-system = [
|
|
setuptools
|
|
];
|
|
|
|
dependencies = [
|
|
absl-py
|
|
etils
|
|
jsonpath-rw
|
|
networkx
|
|
pandas
|
|
pandas-stubs
|
|
python-dateutil
|
|
rdflib
|
|
requests
|
|
scipy
|
|
tqdm
|
|
]
|
|
++ etils.optional-dependencies.epath;
|
|
|
|
pythonImportsCheck = [ "mlcroissant" ];
|
|
|
|
nativeCheckInputs = [
|
|
apache-beam
|
|
gitpython
|
|
librosa
|
|
pillow
|
|
pytestCheckHook
|
|
pyyaml
|
|
writableTmpDirAsHomeHook
|
|
];
|
|
|
|
disabledTests = [
|
|
# Requires internet access
|
|
"test_hermetic_loading_1_1"
|
|
"test_load_from_huggingface"
|
|
"test_nonhermetic_loading"
|
|
"test_nonhermetic_loading_1_0"
|
|
|
|
# AssertionError: assert {'records/aud...t32), 22050)'} == {'records/aud...t32), 22050)'}
|
|
"test_hermetic_loading"
|
|
|
|
# AttributeError: 'MaybeReshuffle' object has no attribute 'side_inputs'
|
|
"test_beam_hermetic_loading"
|
|
];
|
|
|
|
meta = {
|
|
description = "High-level format for machine learning datasets that brings together four rich layers";
|
|
homepage = "https://github.com/mlcommons/croissant";
|
|
changelog = "https://github.com/mlcommons/croissant/releases/tag/${src.tag}";
|
|
license = lib.licenses.asl20;
|
|
maintainers = with lib.maintainers; [ GaetanLepage ];
|
|
platforms = lib.platforms.all;
|
|
mainProgram = "mlcroissant";
|
|
};
|
|
}
|