python3Packages.triton: 3.4.0 -> 3.5.0

Diff: https://github.com/triton-lang/triton/compare/v3.4.0...v3.5.0 Co-authored-by: "Else, Someone" <else@someonex.net>
2025-11-10 10:19:30 +00:00
parent 6a941e54e1
commit d1e6e82190
7 changed files with 63 additions and 109 deletions
--- a/pkgs/by-name/tr/triton-llvm/package.nix
+++ b/pkgs/by-name/tr/triton-llvm/package.nix
@@ -25,6 +25,10 @@
  buildTests ? true,
  llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv
  llvmProjectsToBuild ? [
+    # Required for building triton>=3.5.0
+    # https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6
+    "lld"
+
    "llvm"
    "mlir"
  ],
@@ -64,7 +68,7 @@ let
 in
 stdenv.mkDerivation (finalAttrs: {
  pname = "triton-llvm";
-  version = "21.0.0-unstable-2025-06-10"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake
+  version = "22.0.0-unstable-2025-07-15"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake

  outputs = [
    "out"
@@ -80,8 +84,8 @@ stdenv.mkDerivation (finalAttrs: {
  src = fetchFromGitHub {
    owner = "llvm";
    repo = "llvm-project";
-    rev = "8957e64a20fc7f4277565c6cfe3e555c119783ce";
-    hash = "sha256-ljdwHPLGZv72RBPBg5rs7pZczsB+WJhdCeHJxoi4gJQ=";
+    rev = "7d5de3033187c8a3bb4d2e322f5462cdaf49808f";
+    hash = "sha256-ayW6sOZGvP3SBjfmpXvYQJrPOAElY0MEHPFvj2fq+bM=";
  };

  nativeBuildInputs = [
--- a/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch
+++ b/pkgs/development/python-modules/triton/0001-_build-allow-extra-cc-flags.patch
@@ -1,8 +1,8 @@
 diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py
-index 1b76548d4..2756dccdb 100644
+index 7614fe2ae..203db996b 100644
 --- a/python/triton/runtime/build.py
 +++ b/python/triton/runtime/build.py
-@@ -33,5 +33,13 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
+@@ -47,6 +47,14 @@ def _build(name: str, src: str, srcdir: str, library_dirs: list[str], include_di
     cc_cmd += [f'-l{lib}' for lib in libraries]
     cc_cmd += [f"-L{dir}" for dir in library_dirs]
     cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None]
@@ -14,5 +14,6 @@ index 1b76548d4..2756dccdb 100644
 +        import shlex
 +        cc_cmd.extend(shlex.split(cc_cmd_extra_flags))
 +
+     cc_cmd.extend(ccflags)
     subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
     return so
--- a/pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch
+++ b/pkgs/development/python-modules/triton/0002-nvidia-driver-short-circuit-before-ldconfig.patch
@@ -1,14 +1,14 @@
 diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py
-index d088ec092..625de2db8 100644
+index e6fd6a968..2b39fea29 100644
 --- a/third_party/nvidia/backend/driver.py
 +++ b/third_party/nvidia/backend/driver.py
@@ -23,6 +23,9 @@ def libcuda_dirs():
-     if env_libcuda_path:
+     if env_libcuda_path := knobs.nvidia.libcuda_path:
         return [env_libcuda_path]
 
 +    if os.path.exists("@libcudaStubsDir@"):
 +        return ["@libcudaStubsDir@"]
 +
-     libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode()
+     libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore")
     # each line looks like the following:
     # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
--- a/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch
+++ b/pkgs/development/python-modules/triton/0003-nvidia-cudart-a-systempath.patch
@@ -1,5 +1,5 @@
 diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c
-index ab24f7657..46dbaceb0 100644
+index bff09d8c1..a5c341711 100644
 --- a/third_party/nvidia/backend/driver.c
 +++ b/third_party/nvidia/backend/driver.c
@@ -1,4 +1,4 @@
@@ -7,9 +7,9 @@ index ab24f7657..46dbaceb0 100644
 +#include <cuda.h>
 #include <dlfcn.h>
 #include <stdbool.h>
- #define PY_SSIZE_T_CLEAN
+ #include <stdlib.h>
 diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py
-index 47544bd8e..d57c6a70f 100644
+index 2b39fea29..3346eb954 100644
 --- a/third_party/nvidia/backend/driver.py
 +++ b/third_party/nvidia/backend/driver.py
@@ -12,7 +12,8 @@ from triton.backends.compiler import GPUTarget
@@ -21,13 +21,13 @@ index 47544bd8e..d57c6a70f 100644
 +include_dirs = [*shlex.split("@cudaToolkitIncludeDirs@"), os.path.join(dirname, "include")]
 libdevice_dir = os.path.join(dirname, "lib")
 libraries = ['cuda']
- 
-@@ -256,7 +257,7 @@ def make_launcher(constants, signature, tensordesc_meta):
-     params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"]
+ PyCUtensorMap = None
+@@ -265,7 +266,7 @@ def make_launcher(constants, signature, tensordesc_meta):
     params.append("&global_scratch")
+     params.append("&profile_scratch")
     src = f"""
 -#include \"cuda.h\"
 +#include <cuda.h>
- #include <stdbool.h>
- #include <Python.h>
 #include <dlfcn.h>
+ #include <stdbool.h>
+ #include <stdlib.h>
--- a/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch
+++ b/pkgs/development/python-modules/triton/0004-nvidia-allow-static-ptxas-path.patch
@@ -1,13 +1,13 @@
 diff --git a/python/triton/knobs.py b/python/triton/knobs.py
-index 30804b170..c6a3a737d 100644
+index 161f739bd..047b19d69 100644
 --- a/python/triton/knobs.py
 +++ b/python/triton/knobs.py
-@@ -203,6 +203,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]):
-             # accessible.
-             self.default(),
-         ]
+@@ -208,6 +208,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]):
+         else:
+             paths = [self.default_path]
+ 
 +        import shlex
 +        paths.extend(shlex.split("@nixpkgsExtraBinaryPaths@"))
         for path in paths:
-             if not path or not os.access(path, os.X_OK):
-                 continue
+             if tool := NvidiaTool.from_path(path):
+                 return tool
--- a/pkgs/development/python-modules/triton/0005-amd-search-env-paths.patch
+++ b/pkgs/development/python-modules/triton/0005-amd-search-env-paths.patch
@@ -1,60 +0,0 @@
-From 9e4e58b647c17c5fa098c8a74e221f88d3cb1a43 Mon Sep 17 00:00:00 2001
-From: Luna Nova <git@lunnova.dev>
-Date: Sun, 24 Aug 2025 07:41:30 -0700
-Subject: [PATCH] [AMD] Search HIP_PATH, hipconfig, and ROCM_PATH for
- libamdhip64
-
-Search for libamdhip64 from HIP_PATH env var, hipconfig --path output,
-and ROCM_PATH before looking in system-wide ldconfig or /opt/rocm.
-
-The system-wide ROCm path isn't guaranteed to be where the ROCm
-install we're building against is located, so follow typical ROCm
-lib behavior and look under env paths first.
-
-This is especially important for non-FHS distros like NixOS
-where /opt/rocm never exists, but may be useful in more
-typical distros if multiple ROCm installs are present
-to ensure the right libamdhip64.so is picked up.
---
- third_party/amd/backend/driver.py | 28 ++++++++++++++++++++++++++++
- 1 file changed, 28 insertions(+)
-
-diff --git a/third_party/amd/backend/driver.py b/third_party/amd/backend/driver.py
-index af8e1a5c8097..57b0f7388c60 100644
--- a/third_party/amd/backend/driver.py
-+++ b/third_party/amd/backend/driver.py
-@@ -110,6 +110,34 @@ def _get_path_to_hip_runtime_dylib():
-                 return f
-             paths.append(f)
- 
-+    # HIP_PATH should point to HIP SDK root if set
-+    env_hip_path = os.getenv("HIP_PATH")
-+    if env_hip_path:
-+        hip_lib_path = os.path.join(env_hip_path, "lib", lib_name)
-+        if os.path.exists(hip_lib_path):
-+            return hip_lib_path
-+        paths.append(hip_lib_path)
-+
-+    # if available, `hipconfig --path` prints the HIP SDK root
-+    try:
-+        hip_root = subprocess.check_output(["hipconfig", "--path"]).decode().strip()
-+        if hip_root:
-+            hip_lib_path = os.path.join(hip_root, "lib", lib_name)
-+            if os.path.exists(hip_lib_path):
-+                return hip_lib_path
-+            paths.append(hip_lib_path)
-+    except (subprocess.CalledProcessError, FileNotFoundError):
-+        # hipconfig may not be available
-+        pass
-+
-+    # ROCm lib dir based on env var
-+    env_rocm_path = os.getenv("ROCM_PATH")
-+    if env_rocm_path:
-+        rocm_lib_path = os.path.join(env_rocm_path, "lib", lib_name)
-+        if os.path.exists(rocm_lib_path):
-+            return rocm_lib_path
-+        paths.append(rocm_lib_path)
-+
-     # Afterwards try to search the loader dynamic library resolution paths.
-     libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore")
-     # each line looks like the following:
--- a/pkgs/development/python-modules/triton/default.nix
+++ b/pkgs/development/python-modules/triton/default.nix
@@ -1,36 +1,49 @@
 {
  lib,
-  addDriverRunpath,
-  buildPythonPackage,
-  cmake,
+  stdenv,
  config,
-  cudaPackages,
+  buildPythonPackage,
  fetchFromGitHub,
-  filelock,
-  gtest,
-  libxml2,
+
+  # patches
+  replaceVars,
+  addDriverRunpath,
+  cudaPackages,
+
+  # build-system
+  setuptools,
+
+  # nativeBuildInputs
+  cmake,
+  ninja,
  lit,
  llvm,
+  writableTmpDirAsHomeHook,
+
+  # buildInputs
+  gtest,
+  libxml2,
  ncurses,
-  ninja,
  pybind11,
+  zlib,
+
+  # dependencies
+  filelock,
+
+  # passthru
  python,
  pytestCheckHook,
-  writableTmpDirAsHomeHook,
-  stdenv,
-  replaceVars,
-  setuptools,
  torchWithRocm,
-  zlib,
-  cudaSupport ? config.cudaSupport,
  runCommand,
-  rocmPackages,
  triton,
+  rocmPackages,
+
+  cudaSupport ? config.cudaSupport,
 }:

 buildPythonPackage rec {
  pname = "triton";
-  version = "3.4.0";
+  version = "3.5.0";
  pyproject = true;

  # Remember to bump triton-llvm as well!
@@ -38,7 +51,7 @@ buildPythonPackage rec {
    owner = "triton-lang";
    repo = "triton";
    tag = "v${version}";
-    hash = "sha256-78s9ke6UV7Tnx3yCr0QZcVDqQELR4XoGgJY7olNJmjk=";
+    hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY=";
  };

  patches = [
@@ -49,8 +62,6 @@ buildPythonPackage rec {
      libcudaStubsDir =
        if cudaSupport then "${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs" else null;
    })
-    # Upstream PR: https://github.com/triton-lang/triton/pull/7959
-    ./0005-amd-search-env-paths.patch
  ]
  ++ lib.optionals cudaSupport [
    (replaceVars ./0003-nvidia-cudart-a-systempath.patch {
@@ -88,13 +99,6 @@ buildPythonPackage rec {
      substituteInPlace cmake/AddTritonUnitTest.cmake \
        --replace-fail "include(\''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)" ""\
        --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)"
-    ''
-    # Don't use FHS path for ROCm LLD
-    # Remove this after `[AMD] Use lld library API #7548` makes it into a release
-    + ''
-      substituteInPlace third_party/amd/backend/compiler.py \
-        --replace-fail 'lld = Path("/opt/rocm/llvm/bin/ld.lld")' \
-        "import os;lld = Path(os.getenv('HIP_PATH', '/opt/rocm/')"' + "/llvm/bin/ld.lld")'
    '';

  build-system = [ setuptools ];
@@ -116,6 +120,11 @@ buildPythonPackage rec {

  cmakeFlags = [
    (lib.cmakeFeature "LLVM_SYSPATH" "${llvm}")
+
+    # `find_package` is called with `NO_DEFAULT_PATH`
+    # https://cmake.org/cmake/help/latest/command/find_package.html
+    # https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6
+    (lib.cmakeFeature "LLD_DIR" "${lib.getLib llvm}")
  ];

  buildInputs = [