diff --git a/pkgs/by-name/tr/triton-llvm/package.nix b/pkgs/by-name/tr/triton-llvm/package.nix index bbe124f2c371..833609e70713 100644 --- a/pkgs/by-name/tr/triton-llvm/package.nix +++ b/pkgs/by-name/tr/triton-llvm/package.nix @@ -25,6 +25,10 @@ buildTests ? true, llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv llvmProjectsToBuild ? [ + # Required for building triton>=3.5.0 + # https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6 + "lld" + "llvm" "mlir" ], @@ -64,7 +68,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "triton-llvm"; - version = "21.0.0-unstable-2025-06-10"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake + version = "22.0.0-unstable-2025-07-15"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake outputs = [ "out" @@ -80,8 +84,8 @@ stdenv.mkDerivation (finalAttrs: { src = fetchFromGitHub { owner = "llvm"; repo = "llvm-project"; - rev = "8957e64a20fc7f4277565c6cfe3e555c119783ce"; - hash = "sha256-ljdwHPLGZv72RBPBg5rs7pZczsB+WJhdCeHJxoi4gJQ="; + rev = "7d5de3033187c8a3bb4d2e322f5462cdaf49808f"; + hash = "sha256-ayW6sOZGvP3SBjfmpXvYQJrPOAElY0MEHPFvj2fq+bM="; }; nativeBuildInputs = [ diff --git a/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch b/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch index 7b97c7f87b0d..79952183f556 100644 --- a/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch +++ b/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch @@ -1,8 +1,8 @@ diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py -index 1b76548d4..2756dccdb 100644 +index 7614fe2ae..203db996b 100644 --- a/python/triton/runtime/build.py +++ b/python/triton/runtime/build.py -@@ -33,5 +33,13 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries): +@@ -47,6 +47,14 @@ def _build(name: str, src: str, srcdir: str, library_dirs: list[str], include_di cc_cmd += [f'-l{lib}' for lib in libraries] cc_cmd += [f"-L{dir}" for dir in library_dirs] cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None] @@ -14,5 +14,6 @@ index 1b76548d4..2756dccdb 100644 + import shlex + cc_cmd.extend(shlex.split(cc_cmd_extra_flags)) + + cc_cmd.extend(ccflags) subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL) return so diff --git a/pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch b/pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch index 13077545d7ef..c8d08eeff93d 100644 --- a/pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch +++ b/pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch @@ -1,14 +1,14 @@ diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py -index d088ec092..625de2db8 100644 +index e6fd6a968..2b39fea29 100644 --- a/third_party/nvidia/backend/driver.py +++ b/third_party/nvidia/backend/driver.py @@ -23,6 +23,9 @@ def libcuda_dirs(): - if env_libcuda_path: + if env_libcuda_path := knobs.nvidia.libcuda_path: return [env_libcuda_path] + if os.path.exists("@libcudaStubsDir@"): + return ["@libcudaStubsDir@"] + - libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode() + libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore") # each line looks like the following: # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1 diff --git a/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch b/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch index 66a757c77466..da5ff0c9de7c 100644 --- a/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch +++ b/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch @@ -1,5 +1,5 @@ diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c -index ab24f7657..46dbaceb0 100644 +index bff09d8c1..a5c341711 100644 --- a/third_party/nvidia/backend/driver.c +++ b/third_party/nvidia/backend/driver.c @@ -1,4 +1,4 @@ @@ -7,9 +7,9 @@ index ab24f7657..46dbaceb0 100644 +#include #include #include - #define PY_SSIZE_T_CLEAN + #include diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py -index 47544bd8e..d57c6a70f 100644 +index 2b39fea29..3346eb954 100644 --- a/third_party/nvidia/backend/driver.py +++ b/third_party/nvidia/backend/driver.py @@ -12,7 +12,8 @@ from triton.backends.compiler import GPUTarget @@ -21,13 +21,13 @@ index 47544bd8e..d57c6a70f 100644 +include_dirs = [*shlex.split("@cudaToolkitIncludeDirs@"), os.path.join(dirname, "include")] libdevice_dir = os.path.join(dirname, "lib") libraries = ['cuda'] - -@@ -256,7 +257,7 @@ def make_launcher(constants, signature, tensordesc_meta): - params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"] + PyCUtensorMap = None +@@ -265,7 +266,7 @@ def make_launcher(constants, signature, tensordesc_meta): params.append("&global_scratch") + params.append("&profile_scratch") src = f""" -#include \"cuda.h\" +#include - #include - #include #include + #include + #include diff --git a/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch b/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch index 47c1380af85a..17f13dae2139 100644 --- a/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch +++ b/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch @@ -1,13 +1,13 @@ diff --git a/python/triton/knobs.py b/python/triton/knobs.py -index 30804b170..c6a3a737d 100644 +index 161f739bd..047b19d69 100644 --- a/python/triton/knobs.py +++ b/python/triton/knobs.py -@@ -203,6 +203,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]): - # accessible. - self.default(), - ] +@@ -208,6 +208,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]): + else: + paths = [self.default_path] + + import shlex + paths.extend(shlex.split("@nixpkgsExtraBinaryPaths@")) for path in paths: - if not path or not os.access(path, os.X_OK): - continue + if tool := NvidiaTool.from_path(path): + return tool diff --git a/pkgs/development/python-modules/triton/0005-amd-search-env-paths.patch b/pkgs/development/python-modules/triton/0005-amd-search-env-paths.patch deleted file mode 100644 index 8f46c826c2fd..000000000000 --- a/pkgs/development/python-modules/triton/0005-amd-search-env-paths.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 9e4e58b647c17c5fa098c8a74e221f88d3cb1a43 Mon Sep 17 00:00:00 2001 -From: Luna Nova -Date: Sun, 24 Aug 2025 07:41:30 -0700 -Subject: [PATCH] [AMD] Search HIP_PATH, hipconfig, and ROCM_PATH for - libamdhip64 - -Search for libamdhip64 from HIP_PATH env var, hipconfig --path output, -and ROCM_PATH before looking in system-wide ldconfig or /opt/rocm. - -The system-wide ROCm path isn't guaranteed to be where the ROCm -install we're building against is located, so follow typical ROCm -lib behavior and look under env paths first. - -This is especially important for non-FHS distros like NixOS -where /opt/rocm never exists, but may be useful in more -typical distros if multiple ROCm installs are present -to ensure the right libamdhip64.so is picked up. ---- - third_party/amd/backend/driver.py | 28 ++++++++++++++++++++++++++++ - 1 file changed, 28 insertions(+) - -diff --git a/third_party/amd/backend/driver.py b/third_party/amd/backend/driver.py -index af8e1a5c8097..57b0f7388c60 100644 ---- a/third_party/amd/backend/driver.py -+++ b/third_party/amd/backend/driver.py -@@ -110,6 +110,34 @@ def _get_path_to_hip_runtime_dylib(): - return f - paths.append(f) - -+ # HIP_PATH should point to HIP SDK root if set -+ env_hip_path = os.getenv("HIP_PATH") -+ if env_hip_path: -+ hip_lib_path = os.path.join(env_hip_path, "lib", lib_name) -+ if os.path.exists(hip_lib_path): -+ return hip_lib_path -+ paths.append(hip_lib_path) -+ -+ # if available, `hipconfig --path` prints the HIP SDK root -+ try: -+ hip_root = subprocess.check_output(["hipconfig", "--path"]).decode().strip() -+ if hip_root: -+ hip_lib_path = os.path.join(hip_root, "lib", lib_name) -+ if os.path.exists(hip_lib_path): -+ return hip_lib_path -+ paths.append(hip_lib_path) -+ except (subprocess.CalledProcessError, FileNotFoundError): -+ # hipconfig may not be available -+ pass -+ -+ # ROCm lib dir based on env var -+ env_rocm_path = os.getenv("ROCM_PATH") -+ if env_rocm_path: -+ rocm_lib_path = os.path.join(env_rocm_path, "lib", lib_name) -+ if os.path.exists(rocm_lib_path): -+ return rocm_lib_path -+ paths.append(rocm_lib_path) -+ - # Afterwards try to search the loader dynamic library resolution paths. - libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore") - # each line looks like the following: diff --git a/pkgs/development/python-modules/triton/default.nix b/pkgs/development/python-modules/triton/default.nix index a8bb1aab043d..aa207c24b23a 100644 --- a/pkgs/development/python-modules/triton/default.nix +++ b/pkgs/development/python-modules/triton/default.nix @@ -1,36 +1,49 @@ { lib, - addDriverRunpath, - buildPythonPackage, - cmake, + stdenv, config, - cudaPackages, + buildPythonPackage, fetchFromGitHub, - filelock, - gtest, - libxml2, + + # patches + replaceVars, + addDriverRunpath, + cudaPackages, + + # build-system + setuptools, + + # nativeBuildInputs + cmake, + ninja, lit, llvm, + writableTmpDirAsHomeHook, + + # buildInputs + gtest, + libxml2, ncurses, - ninja, pybind11, + zlib, + + # dependencies + filelock, + + # passthru python, pytestCheckHook, - writableTmpDirAsHomeHook, - stdenv, - replaceVars, - setuptools, torchWithRocm, - zlib, - cudaSupport ? config.cudaSupport, runCommand, - rocmPackages, triton, + rocmPackages, + + cudaSupport ? config.cudaSupport, }: buildPythonPackage rec { pname = "triton"; - version = "3.4.0"; + version = "3.5.0"; pyproject = true; # Remember to bump triton-llvm as well! @@ -38,7 +51,7 @@ buildPythonPackage rec { owner = "triton-lang"; repo = "triton"; tag = "v${version}"; - hash = "sha256-78s9ke6UV7Tnx3yCr0QZcVDqQELR4XoGgJY7olNJmjk="; + hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY="; }; patches = [ @@ -49,8 +62,6 @@ buildPythonPackage rec { libcudaStubsDir = if cudaSupport then "${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs" else null; }) - # Upstream PR: https://github.com/triton-lang/triton/pull/7959 - ./0005-amd-search-env-paths.patch ] ++ lib.optionals cudaSupport [ (replaceVars ./0003-nvidia-cudart-a-systempath.patch { @@ -88,13 +99,6 @@ buildPythonPackage rec { substituteInPlace cmake/AddTritonUnitTest.cmake \ --replace-fail "include(\''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)" ""\ --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)" - '' - # Don't use FHS path for ROCm LLD - # Remove this after `[AMD] Use lld library API #7548` makes it into a release - + '' - substituteInPlace third_party/amd/backend/compiler.py \ - --replace-fail 'lld = Path("/opt/rocm/llvm/bin/ld.lld")' \ - "import os;lld = Path(os.getenv('HIP_PATH', '/opt/rocm/')"' + "/llvm/bin/ld.lld")' ''; build-system = [ setuptools ]; @@ -116,6 +120,11 @@ buildPythonPackage rec { cmakeFlags = [ (lib.cmakeFeature "LLVM_SYSPATH" "${llvm}") + + # `find_package` is called with `NO_DEFAULT_PATH` + # https://cmake.org/cmake/help/latest/command/find_package.html + # https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6 + (lib.cmakeFeature "LLD_DIR" "${lib.getLib llvm}") ]; buildInputs = [