108 lines
2.1 KiB
Nix
108 lines
2.1 KiB
Nix
{
|
|
fetchFromGitHub,
|
|
lib,
|
|
cudaPackages,
|
|
buildPythonPackage,
|
|
nix-update-script,
|
|
|
|
setuptools,
|
|
|
|
pybind11,
|
|
|
|
which,
|
|
|
|
fastparquet,
|
|
flash-attn,
|
|
ninja,
|
|
numpy,
|
|
pandas,
|
|
pillow,
|
|
pygments,
|
|
regex,
|
|
rich,
|
|
safetensors,
|
|
tokenizers,
|
|
torch,
|
|
websockets,
|
|
}:
|
|
buildPythonPackage (finalAttrs: {
|
|
pname = "exllamav2";
|
|
version = "0.3.2";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "turboderp-org";
|
|
repo = "exllamav2";
|
|
tag = "v${finalAttrs.version}";
|
|
hash = "sha256-WbpbANenOuy6F0qAKVKAmolHjgRKfPxSVud8FZG1TXw=";
|
|
};
|
|
|
|
build-system = [
|
|
setuptools
|
|
];
|
|
|
|
buildInputs = [
|
|
pybind11
|
|
]
|
|
++ lib.optionals torch.cudaSupport [
|
|
cudaPackages.libcusparse # cusparse.h
|
|
cudaPackages.libcublas # cublas_v2.h
|
|
cudaPackages.libcusolver # cusolverDn.h
|
|
cudaPackages.libcurand # curand_kernel.h
|
|
cudaPackages.cuda_cudart # cuda_runtime.h
|
|
];
|
|
|
|
env = lib.optionalAttrs torch.cudaSupport {
|
|
CUDA_HOME = lib.getDev cudaPackages.cuda_nvcc;
|
|
TORCH_CUDA_ARCH_LIST = lib.concatStringsSep ";" torch.cudaCapabilities;
|
|
};
|
|
|
|
nativeBuildInputs = [
|
|
ninja
|
|
which
|
|
];
|
|
|
|
pythonRelaxDeps = [
|
|
"numpy" # Wants numpy 1.26.4
|
|
];
|
|
|
|
dependencies = [
|
|
fastparquet
|
|
flash-attn
|
|
ninja
|
|
numpy
|
|
pandas
|
|
pillow
|
|
pygments
|
|
regex
|
|
rich
|
|
safetensors
|
|
tokenizers
|
|
torch
|
|
websockets
|
|
];
|
|
|
|
pythonImportsCheck = [ "exllamav2" ];
|
|
|
|
# Tests require GPU hardware and external model files
|
|
doCheck = false;
|
|
|
|
passthru.updateScript = nix-update-script { };
|
|
|
|
meta = {
|
|
homepage = "https://github.com/turboderp-org/exllamav2";
|
|
description = "Inference library for running LLMs locally on modern consumer-class GPUs";
|
|
changelog = "https://github.com/turboderp-org/exllamav2/releases/tag/${finalAttrs.src.tag}";
|
|
license = lib.licenses.mit;
|
|
platforms = [
|
|
"x86_64-windows"
|
|
"x86_64-linux"
|
|
];
|
|
|
|
# Package requires CUDA or ROCm for functionality
|
|
# ROCm support is partially implemented but untested
|
|
broken = !torch.cudaSupport;
|
|
maintainers = with lib.maintainers; [ BatteredBunny ];
|
|
};
|
|
})
|