python3Packages.triton: 3.4.0 -> 3.5.0

Diff: https://github.com/triton-lang/triton/compare/v3.4.0...v3.5.0

Co-authored-by: "Else, Someone" <else@someonex.net>
This commit is contained in:
Gaetan Lepage
2025-11-10 10:19:30 +00:00
parent 6a941e54e1
commit d1e6e82190
7 changed files with 63 additions and 109 deletions

View File

@@ -25,6 +25,10 @@
buildTests ? true,
llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv
llvmProjectsToBuild ? [
# Required for building triton>=3.5.0
# https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6
"lld"
"llvm"
"mlir"
],
@@ -64,7 +68,7 @@ let
in
stdenv.mkDerivation (finalAttrs: {
pname = "triton-llvm";
version = "21.0.0-unstable-2025-06-10"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake
version = "22.0.0-unstable-2025-07-15"; # See https://github.com/llvm/llvm-project/blob/main/cmake/Modules/LLVMVersion.cmake
outputs = [
"out"
@@ -80,8 +84,8 @@ stdenv.mkDerivation (finalAttrs: {
src = fetchFromGitHub {
owner = "llvm";
repo = "llvm-project";
rev = "8957e64a20fc7f4277565c6cfe3e555c119783ce";
hash = "sha256-ljdwHPLGZv72RBPBg5rs7pZczsB+WJhdCeHJxoi4gJQ=";
rev = "7d5de3033187c8a3bb4d2e322f5462cdaf49808f";
hash = "sha256-ayW6sOZGvP3SBjfmpXvYQJrPOAElY0MEHPFvj2fq+bM=";
};
nativeBuildInputs = [

View File

@@ -1,8 +1,8 @@
diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py
index 1b76548d4..2756dccdb 100644
index 7614fe2ae..203db996b 100644
--- a/python/triton/runtime/build.py
+++ b/python/triton/runtime/build.py
@@ -33,5 +33,13 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries):
@@ -47,6 +47,14 @@ def _build(name: str, src: str, srcdir: str, library_dirs: list[str], include_di
cc_cmd += [f'-l{lib}' for lib in libraries]
cc_cmd += [f"-L{dir}" for dir in library_dirs]
cc_cmd += [f"-I{dir}" for dir in include_dirs if dir is not None]
@@ -14,5 +14,6 @@ index 1b76548d4..2756dccdb 100644
+ import shlex
+ cc_cmd.extend(shlex.split(cc_cmd_extra_flags))
+
cc_cmd.extend(ccflags)
subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
return so

View File

@@ -1,14 +1,14 @@
diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py
index d088ec092..625de2db8 100644
index e6fd6a968..2b39fea29 100644
--- a/third_party/nvidia/backend/driver.py
+++ b/third_party/nvidia/backend/driver.py
@@ -23,6 +23,9 @@ def libcuda_dirs():
if env_libcuda_path:
if env_libcuda_path := knobs.nvidia.libcuda_path:
return [env_libcuda_path]
+ if os.path.exists("@libcudaStubsDir@"):
+ return ["@libcudaStubsDir@"]
+
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode()
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore")
# each line looks like the following:
# libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1

View File

@@ -1,5 +1,5 @@
diff --git a/third_party/nvidia/backend/driver.c b/third_party/nvidia/backend/driver.c
index ab24f7657..46dbaceb0 100644
index bff09d8c1..a5c341711 100644
--- a/third_party/nvidia/backend/driver.c
+++ b/third_party/nvidia/backend/driver.c
@@ -1,4 +1,4 @@
@@ -7,9 +7,9 @@ index ab24f7657..46dbaceb0 100644
+#include <cuda.h>
#include <dlfcn.h>
#include <stdbool.h>
#define PY_SSIZE_T_CLEAN
#include <stdlib.h>
diff --git a/third_party/nvidia/backend/driver.py b/third_party/nvidia/backend/driver.py
index 47544bd8e..d57c6a70f 100644
index 2b39fea29..3346eb954 100644
--- a/third_party/nvidia/backend/driver.py
+++ b/third_party/nvidia/backend/driver.py
@@ -12,7 +12,8 @@ from triton.backends.compiler import GPUTarget
@@ -21,13 +21,13 @@ index 47544bd8e..d57c6a70f 100644
+include_dirs = [*shlex.split("@cudaToolkitIncludeDirs@"), os.path.join(dirname, "include")]
libdevice_dir = os.path.join(dirname, "lib")
libraries = ['cuda']
@@ -256,7 +257,7 @@ def make_launcher(constants, signature, tensordesc_meta):
params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"]
PyCUtensorMap = None
@@ -265,7 +266,7 @@ def make_launcher(constants, signature, tensordesc_meta):
params.append("&global_scratch")
params.append("&profile_scratch")
src = f"""
-#include \"cuda.h\"
+#include <cuda.h>
#include <stdbool.h>
#include <Python.h>
#include <dlfcn.h>
#include <stdbool.h>
#include <stdlib.h>

View File

@@ -1,13 +1,13 @@
diff --git a/python/triton/knobs.py b/python/triton/knobs.py
index 30804b170..c6a3a737d 100644
index 161f739bd..047b19d69 100644
--- a/python/triton/knobs.py
+++ b/python/triton/knobs.py
@@ -203,6 +203,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]):
# accessible.
self.default(),
]
@@ -208,6 +208,8 @@ class env_nvidia_tool(env_base[str, NvidiaTool]):
else:
paths = [self.default_path]
+ import shlex
+ paths.extend(shlex.split("@nixpkgsExtraBinaryPaths@"))
for path in paths:
if not path or not os.access(path, os.X_OK):
continue
if tool := NvidiaTool.from_path(path):
return tool

View File

@@ -1,60 +0,0 @@
From 9e4e58b647c17c5fa098c8a74e221f88d3cb1a43 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Sun, 24 Aug 2025 07:41:30 -0700
Subject: [PATCH] [AMD] Search HIP_PATH, hipconfig, and ROCM_PATH for
libamdhip64
Search for libamdhip64 from HIP_PATH env var, hipconfig --path output,
and ROCM_PATH before looking in system-wide ldconfig or /opt/rocm.
The system-wide ROCm path isn't guaranteed to be where the ROCm
install we're building against is located, so follow typical ROCm
lib behavior and look under env paths first.
This is especially important for non-FHS distros like NixOS
where /opt/rocm never exists, but may be useful in more
typical distros if multiple ROCm installs are present
to ensure the right libamdhip64.so is picked up.
---
third_party/amd/backend/driver.py | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/third_party/amd/backend/driver.py b/third_party/amd/backend/driver.py
index af8e1a5c8097..57b0f7388c60 100644
--- a/third_party/amd/backend/driver.py
+++ b/third_party/amd/backend/driver.py
@@ -110,6 +110,34 @@ def _get_path_to_hip_runtime_dylib():
return f
paths.append(f)
+ # HIP_PATH should point to HIP SDK root if set
+ env_hip_path = os.getenv("HIP_PATH")
+ if env_hip_path:
+ hip_lib_path = os.path.join(env_hip_path, "lib", lib_name)
+ if os.path.exists(hip_lib_path):
+ return hip_lib_path
+ paths.append(hip_lib_path)
+
+ # if available, `hipconfig --path` prints the HIP SDK root
+ try:
+ hip_root = subprocess.check_output(["hipconfig", "--path"]).decode().strip()
+ if hip_root:
+ hip_lib_path = os.path.join(hip_root, "lib", lib_name)
+ if os.path.exists(hip_lib_path):
+ return hip_lib_path
+ paths.append(hip_lib_path)
+ except (subprocess.CalledProcessError, FileNotFoundError):
+ # hipconfig may not be available
+ pass
+
+ # ROCm lib dir based on env var
+ env_rocm_path = os.getenv("ROCM_PATH")
+ if env_rocm_path:
+ rocm_lib_path = os.path.join(env_rocm_path, "lib", lib_name)
+ if os.path.exists(rocm_lib_path):
+ return rocm_lib_path
+ paths.append(rocm_lib_path)
+
# Afterwards try to search the loader dynamic library resolution paths.
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore")
# each line looks like the following:

View File

@@ -1,36 +1,49 @@
{
lib,
addDriverRunpath,
buildPythonPackage,
cmake,
stdenv,
config,
cudaPackages,
buildPythonPackage,
fetchFromGitHub,
filelock,
gtest,
libxml2,
# patches
replaceVars,
addDriverRunpath,
cudaPackages,
# build-system
setuptools,
# nativeBuildInputs
cmake,
ninja,
lit,
llvm,
writableTmpDirAsHomeHook,
# buildInputs
gtest,
libxml2,
ncurses,
ninja,
pybind11,
zlib,
# dependencies
filelock,
# passthru
python,
pytestCheckHook,
writableTmpDirAsHomeHook,
stdenv,
replaceVars,
setuptools,
torchWithRocm,
zlib,
cudaSupport ? config.cudaSupport,
runCommand,
rocmPackages,
triton,
rocmPackages,
cudaSupport ? config.cudaSupport,
}:
buildPythonPackage rec {
pname = "triton";
version = "3.4.0";
version = "3.5.0";
pyproject = true;
# Remember to bump triton-llvm as well!
@@ -38,7 +51,7 @@ buildPythonPackage rec {
owner = "triton-lang";
repo = "triton";
tag = "v${version}";
hash = "sha256-78s9ke6UV7Tnx3yCr0QZcVDqQELR4XoGgJY7olNJmjk=";
hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY=";
};
patches = [
@@ -49,8 +62,6 @@ buildPythonPackage rec {
libcudaStubsDir =
if cudaSupport then "${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs" else null;
})
# Upstream PR: https://github.com/triton-lang/triton/pull/7959
./0005-amd-search-env-paths.patch
]
++ lib.optionals cudaSupport [
(replaceVars ./0003-nvidia-cudart-a-systempath.patch {
@@ -88,13 +99,6 @@ buildPythonPackage rec {
substituteInPlace cmake/AddTritonUnitTest.cmake \
--replace-fail "include(\''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)" ""\
--replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)"
''
# Don't use FHS path for ROCm LLD
# Remove this after `[AMD] Use lld library API #7548` makes it into a release
+ ''
substituteInPlace third_party/amd/backend/compiler.py \
--replace-fail 'lld = Path("/opt/rocm/llvm/bin/ld.lld")' \
"import os;lld = Path(os.getenv('HIP_PATH', '/opt/rocm/')"' + "/llvm/bin/ld.lld")'
'';
build-system = [ setuptools ];
@@ -116,6 +120,11 @@ buildPythonPackage rec {
cmakeFlags = [
(lib.cmakeFeature "LLVM_SYSPATH" "${llvm}")
# `find_package` is called with `NO_DEFAULT_PATH`
# https://cmake.org/cmake/help/latest/command/find_package.html
# https://github.com/triton-lang/triton/blob/c3c476f357f1e9768ea4e45aa5c17528449ab9ef/third_party/amd/CMakeLists.txt#L6
(lib.cmakeFeature "LLD_DIR" "${lib.getLib llvm}")
];
buildInputs = [