From 4d2c7ad003e918b7f2b2803955bc4af1ba48b165 Mon Sep 17 00:00:00 2001 From: Luna Nova Date: Sat, 4 Jan 2025 10:24:14 -0800 Subject: [PATCH] rocmPackages: 6.0.2 -> 6.3.1 Includes patch suggested by @shuni64 which fixes half precision ABI issues Includes hipblaslt compression patch Includes configurable hipblaslt support in rocblas rocmPackages_6.hipblaslt: respect NIX_BUILD_CORES in tensilelite rocmPackages_6.hipblas: propagate hipblas-common rocmPackages_6.clr: avoid confusion with hipClangPath Co-authored-by: Gavin Zhao --- .../manual/release-notes/rl-2505.section.md | 4 + .../rocm-modules/6/amdsmi/amd_hsmp.h | 417 ++++++++ .../rocm-modules/6/amdsmi/default.nix | 79 ++ .../rocm-modules/6/aotriton/default.nix | 226 ++++ .../6/aotriton/triton-remove-distutils.patch | 68 ++ .../rocm-modules/6/clang-ocl/default.nix | 45 - .../6/clr/cmake-find-x11-libgl.patch | 40 + .../rocm-modules/6/clr/default.nix | 219 ++-- .../6/clr/fix-null-stream-sync-perf.patch | 101 ++ .../6/composable_kernel/ck4inductor.nix | 43 + .../6/composable_kernel/default.nix | 125 ++- .../disable-amdgpu-inline.patch | 16 + .../6/composable_kernel/unpack.nix | 15 +- pkgs/development/rocm-modules/6/default.nix | 961 +++++++++--------- .../rocm-modules/6/half/default.nix | 22 +- .../rocm-modules/6/hip-common/default.nix | 20 +- .../rocm-modules/6/hipblas-common/default.nix | 29 + .../rocm-modules/6/hipblas/default.nix | 40 +- .../rocm-modules/6/hipblaslt/default.nix | 235 +++++ .../6/hipblaslt/ext-op-first.diff | 22 + ...ra-definition-of-hipBinUtilPtr_-in-d.patch | 39 + .../rocm-modules/6/hipcc/default.nix | 37 +- .../rocm-modules/6/hipcub/default.nix | 124 ++- .../rocm-modules/6/hipfft/default.nix | 11 +- .../rocm-modules/6/hipfort/default.nix | 24 +- .../rocm-modules/6/hipify/default.nix | 47 +- .../rocm-modules/6/hiprand/default.nix | 13 +- .../rocm-modules/6/hipsolver/default.nix | 17 +- .../rocm-modules/6/hipsparse/default.nix | 23 +- .../6/hsa-amd-aqlprofile-bin/default.nix | 19 +- pkgs/development/rocm-modules/6/llvm/base.nix | 216 ---- .../clang-at-least-16-LLVMgold-path.patch | 14 + .../clang-bodge-ignore-systemwide-incls.diff | 23 + .../rocm-modules/6/llvm/clang-log-jobs.diff | 40 + .../rocm-modules/6/llvm/default.nix | 617 ++++++++--- .../6/llvm/stage-1/clang-unwrapped.nix | 48 - .../rocm-modules/6/llvm/stage-1/lld.nix | 15 - .../rocm-modules/6/llvm/stage-1/llvm.nix | 11 - .../rocm-modules/6/llvm/stage-1/runtimes.nix | 32 - .../stage-2/1000-libcxx-failing-tests.list | 176 ---- .../6/llvm/stage-2/bintools-unwrapped.nix | 29 - .../6/llvm/stage-2/compiler-rt.nix | 64 -- .../rocm-modules/6/llvm/stage-2/libc.nix | 27 - .../rocm-modules/6/llvm/stage-2/libcxx.nix | 43 - .../rocm-modules/6/llvm/stage-2/libcxxabi.nix | 38 - .../rocm-modules/6/llvm/stage-2/libunwind.nix | 27 - .../rocm-modules/6/llvm/stage-2/rstdenv.nix | 37 - .../stage-3/1000-openmp-failing-tests.list | 122 --- .../llvm/stage-3/1001-mlir-failing-tests.list | 11 - .../6/llvm/stage-3/clang-tools-extra.nix | 43 - .../rocm-modules/6/llvm/stage-3/clang.nix | 77 -- .../rocm-modules/6/llvm/stage-3/flang.nix | 32 - .../rocm-modules/6/llvm/stage-3/libclc.nix | 38 - .../rocm-modules/6/llvm/stage-3/lldb.nix | 40 - .../rocm-modules/6/llvm/stage-3/mlir.nix | 61 -- .../rocm-modules/6/llvm/stage-3/openmp.nix | 55 - .../rocm-modules/6/llvm/stage-3/polly.nix | 19 - .../rocm-modules/6/llvm/stage-3/pstl.nix | 16 - .../rocm-modules/6/migraphx/default.nix | 10 +- .../rocm-modules/6/miopen/default.nix | 138 ++- .../rocm-modules/6/miopen/fix-isnan.patch | 31 + .../6/miopen/skip-preexisting-dbs.patch | 22 + .../rocm-modules/6/mivisionx/default.nix | 37 +- .../rocm-modules/6/mscclpp/default.nix | 42 + .../rocm-modules/6/rccl/default.nix | 80 +- .../6/rccl/enable-mscclpp-on-all-gfx9.diff | 13 + .../6/rccl/fix-mainline-support-and-ub.diff | 178 ++++ .../6/rccl/rccl-test-missing-iomanip.diff | 10 + .../rocm-modules/6/rdc/default.nix | 12 +- .../rocm-modules/6/rocalution/default.nix | 19 +- .../rocm-modules/6/rocblas/default.nix | 122 ++- .../rocm-modules/6/rocdbgapi/default.nix | 125 +-- .../rocm-modules/6/rocfft/default.nix | 18 +- .../rocm-modules/6/rocgdb/default.nix | 9 +- .../rocm-modules/6/rocm-cmake/default.nix | 23 +- .../rocm-modules/6/rocm-comgr/default.nix | 54 +- .../rocm-modules/6/rocm-core/default.nix | 32 +- .../6/rocm-device-libs/cmake.patch | 4 +- .../6/rocm-device-libs/default.nix | 46 +- .../rocm-modules/6/rocm-docs-core/default.nix | 45 +- .../rocm-modules/6/rocm-path/default.nix | 27 + .../rocm-modules/6/rocm-runtime/default.nix | 66 +- .../remove-hsa-aqlprofile-dep.patch | 27 + .../rocm-modules/6/rocm-smi/default.nix | 23 +- .../rocm-modules/6/rocm-tests/default.nix | 32 + .../rocm-modules/6/rocm-thunk/default.nix | 54 - .../rocm-modules/6/rocminfo/default.nix | 15 +- .../rocm-modules/6/rocmlir/default.nix | 49 +- .../6/rocmlir/initparamdata-sort-const.patch | 13 + .../rocm-modules/6/rocprim/default.nix | 127 +-- .../6/rocprofiler-register/default.nix | 79 ++ .../rocm-modules/6/rocprofiler/default.nix | 55 +- .../6/rocprofiler/optional-aql-in-cmake.patch | 147 +++ .../6/rocr-debug-agent/default.nix | 11 +- .../rocm-modules/6/rocrand/default.nix | 124 +-- .../rocm-modules/6/rocsolver/default.nix | 41 +- .../rocm-modules/6/rocsparse/default.nix | 16 +- .../rocm-modules/6/rocthrust/default.nix | 15 +- .../rocm-modules/6/roctracer/default.nix | 11 +- .../rocm-modules/6/rocwmma/default.nix | 17 +- .../rocm-modules/6/rpp/default.nix | 18 +- .../rocm-modules/6/tensile/default.nix | 99 +- ...ensile-create-library-dont-copy-twice.diff | 20 + .../tensile-solutionstructs-perf-fix.diff | 48 + .../6/tensile/tensilelite-compression.diff | 345 +++++++ ...lelite-create-library-dont-copy-twice.diff | 37 + ...lelite-gen_assembly-venv-err-handling.diff | 36 + .../rocm-modules/6/triton/default.nix | 56 + pkgs/development/rocm-modules/6/update.nix | 40 +- 109 files changed, 4858 insertions(+), 2912 deletions(-) create mode 100644 pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h create mode 100644 pkgs/development/rocm-modules/6/amdsmi/default.nix create mode 100644 pkgs/development/rocm-modules/6/aotriton/default.nix create mode 100644 pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch delete mode 100644 pkgs/development/rocm-modules/6/clang-ocl/default.nix create mode 100644 pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch create mode 100644 pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch create mode 100644 pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix create mode 100644 pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch create mode 100644 pkgs/development/rocm-modules/6/hipblas-common/default.nix create mode 100644 pkgs/development/rocm-modules/6/hipblaslt/default.nix create mode 100644 pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff create mode 100644 pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch delete mode 100644 pkgs/development/rocm-modules/6/llvm/base.nix create mode 100644 pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch create mode 100644 pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff create mode 100644 pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix delete mode 100644 pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix create mode 100644 pkgs/development/rocm-modules/6/miopen/fix-isnan.patch create mode 100644 pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch create mode 100644 pkgs/development/rocm-modules/6/mscclpp/default.nix create mode 100644 pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff create mode 100644 pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff create mode 100644 pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff create mode 100644 pkgs/development/rocm-modules/6/rocm-path/default.nix create mode 100644 pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch create mode 100644 pkgs/development/rocm-modules/6/rocm-tests/default.nix delete mode 100644 pkgs/development/rocm-modules/6/rocm-thunk/default.nix create mode 100644 pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch create mode 100644 pkgs/development/rocm-modules/6/rocprofiler-register/default.nix create mode 100644 pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch create mode 100644 pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/tensile-solutionstructs-perf-fix.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/tensilelite-compression.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/tensilelite-create-library-dont-copy-twice.diff create mode 100644 pkgs/development/rocm-modules/6/tensile/tensilelite-gen_assembly-venv-err-handling.diff create mode 100644 pkgs/development/rocm-modules/6/triton/default.nix diff --git a/nixos/doc/manual/release-notes/rl-2505.section.md b/nixos/doc/manual/release-notes/rl-2505.section.md index 4e62adf97c4d..9fc1dc4b420c 100644 --- a/nixos/doc/manual/release-notes/rl-2505.section.md +++ b/nixos/doc/manual/release-notes/rl-2505.section.md @@ -380,6 +380,10 @@ - `python3Packages.jaeger-client` was removed because it was deprecated upstream. [OpenTelemetry](https://opentelemetry.io) is the recommended replacement. +- `rocmPackages.rocm-thunk` has been removed and its functionality has been integrated with the ROCm CLR. Use `rocmPackages.clr` instead. + +- `rocmPackages.clang-ocl` has been removed. [It was deprecated by AMD in 2023.](https://github.com/ROCm/clang-ocl) + - `nodePackages.meshcommander` has been removed, as the package was deprecated by Intel. - The default version of `z3` has been updated from 4.8 to 4.13. There are still a few packages that need specific older versions; those will continue to be maintained as long as other packages depend on them but may be removed in the future. diff --git a/pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h b/pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h new file mode 100644 index 000000000000..b027cec2ade0 --- /dev/null +++ b/pkgs/development/rocm-modules/6/amdsmi/amd_hsmp.h @@ -0,0 +1,417 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_X86_AMD_HSMP_H_ +#define _UAPI_ASM_X86_AMD_HSMP_H_ + +#include + +#pragma pack(4) + +#define HSMP_MAX_MSG_LEN 8 + +/* + * HSMP Messages supported + */ +enum hsmp_message_ids { + HSMP_TEST = 1, /* 01h Increments input value by 1 */ + HSMP_GET_SMU_VER, /* 02h SMU FW version */ + HSMP_GET_PROTO_VER, /* 03h HSMP interface version */ + HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */ + HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */ + HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */ + HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */ + HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */ + HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */ + HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */ + HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */ + HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */ + HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */ + HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */ + HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */ + HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */ + HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */ + HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */ + HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */ + HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */ + HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */ + HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */ + HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */ + HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */ + HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */ + HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */ + HSMP_GET_SOCKET_FMAX_FMIN,/* 1Ch Get Fmax and Fmin per socket */ + HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */ + HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */ + HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */ + HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */ + HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */ + HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */ + HSMP_GET_METRIC_TABLE_VER,/* 23h Get metrics table version */ + HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */ + HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */ + HSMP_MSG_ID_MAX, +}; + +struct hsmp_message { + __u32 msg_id; /* Message ID */ + __u16 num_args; /* Number of input argument words in message */ + __u16 response_sz; /* Number of expected output/response words */ + __u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */ + __u16 sock_ind; /* socket number */ +}; + +enum hsmp_msg_type { + HSMP_RSVD = -1, + HSMP_SET = 0, + HSMP_GET = 1, +}; + +enum hsmp_proto_versions { + HSMP_PROTO_VER2 = 2, + HSMP_PROTO_VER3, + HSMP_PROTO_VER4, + HSMP_PROTO_VER5, + HSMP_PROTO_VER6 +}; + +struct hsmp_msg_desc { + int num_args; + int response_sz; + enum hsmp_msg_type type; +}; + +/* + * User may use these comments as reference, please find the + * supported list of messages and message definition in the + * HSMP chapter of respective family/model PPR. + * + * Not supported messages would return -ENOMSG. + */ +static const struct hsmp_msg_desc hsmp_msg_desc_table[] + __attribute__((unused)) = { + /* RESERVED */ + {0, 0, HSMP_RSVD}, + + /* + * HSMP_TEST, num_args = 1, response_sz = 1 + * input: args[0] = xx + * output: args[0] = xx + 1 + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SMU_VER, num_args = 0, response_sz = 1 + * output: args[0] = smu fw ver + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1 + * output: args[0] = proto version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1 + * output: args[0] = socket power in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = power limit value in mWatts + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = socket power limit value in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1 + * output: args[0] = maximuam socket power limit in mWatts + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0 + * input: args[0] = apic id[31:16] + boost limit value in MHz[15:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0 + * input: args[0] = boost limit value in MHz + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id + * output: args[0] = boost limit value in MHz + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1 + * output: args[0] = proc hot status + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0 + * input: args[0] = df pstate[7:0] + */ + {1, 0, HSMP_SET}, + + /* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */ + {0, 0, HSMP_SET}, + + /* + * HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2 + * output: args[0] = fclk in MHz, args[1] = mclk in MHz + */ + {0, 2, HSMP_GET}, + + /* + * HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = core clock in MHz + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1 + * output: args[0] = average c0 residency + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0 + * input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1 + * input: args[0] = nbioid[23:16] + * output: args[0] = max dpm level[15:8] + min dpm level[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1 + * output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] + + * bw in percentage[7:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1 + * output: args[0] = temperature in degree celsius. [15:8] integer part + + * [7:5] fractional part + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = refresh rate[3] + temperature range[2:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1 + * input: args[0] = DIMM address[7:0] + * output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] + + * DIMM address[7:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1 + * output: args[0] = frequency in MHz[31:16] + frequency source[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1 + * input: args[0] = apic id [31:0] + * output: args[0] = frequency in MHz[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1 + * output: args[0] = power in mW[31:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1 + * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0] + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = io bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1 + * input: args[0] = link id[15:8] + bw type[2:0] + * output: args[0] = xgmi bandwidth in Mbps[31:0] + */ + {1, 1, HSMP_GET}, + + /* + * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0 + * input: args[0] = min link width[15:8] + max link width[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1 + * input: args[0] = link rate control value + * output: args[0] = previous link rate control value + */ + {1, 1, HSMP_SET}, + + /* + * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0 + * input: args[0] = power efficiency mode[2:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0 + * input: args[0] = min df pstate[15:8] + max df pstate[7:0] + */ + {1, 0, HSMP_SET}, + + /* + * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1 + * output: args[0] = metrics table version + */ + {0, 1, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0 + */ + {0, 0, HSMP_GET}, + + /* + * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2 + * output: args[0] = lower 32 bits of the address + * output: args[1] = upper 32 bits of the address + */ + {0, 2, HSMP_GET}, +}; + +/* Metrics table (supported only with proto version 6) */ +struct hsmp_metric_table { + __u32 accumulation_counter; + + /* TEMPERATURE */ + __u32 max_socket_temperature; + __u32 max_vr_temperature; + __u32 max_hbm_temperature; + __u64 max_socket_temperature_acc; + __u64 max_vr_temperature_acc; + __u64 max_hbm_temperature_acc; + + /* POWER */ + __u32 socket_power_limit; + __u32 max_socket_power_limit; + __u32 socket_power; + + /* ENERGY */ + __u64 timestamp; + __u64 socket_energy_acc; + __u64 ccd_energy_acc; + __u64 xcd_energy_acc; + __u64 aid_energy_acc; + __u64 hbm_energy_acc; + + /* FREQUENCY */ + __u32 cclk_frequency_limit; + __u32 gfxclk_frequency_limit; + __u32 fclk_frequency; + __u32 uclk_frequency; + __u32 socclk_frequency[4]; + __u32 vclk_frequency[4]; + __u32 dclk_frequency[4]; + __u32 lclk_frequency[4]; + __u64 gfxclk_frequency_acc[8]; + __u64 cclk_frequency_acc[96]; + + /* FREQUENCY RANGE */ + __u32 max_cclk_frequency; + __u32 min_cclk_frequency; + __u32 max_gfxclk_frequency; + __u32 min_gfxclk_frequency; + __u32 fclk_frequency_table[4]; + __u32 uclk_frequency_table[4]; + __u32 socclk_frequency_table[4]; + __u32 vclk_frequency_table[4]; + __u32 dclk_frequency_table[4]; + __u32 lclk_frequency_table[4]; + __u32 max_lclk_dpm_range; + __u32 min_lclk_dpm_range; + + /* XGMI */ + __u32 xgmi_width; + __u32 xgmi_bitrate; + __u64 xgmi_read_bandwidth_acc[8]; + __u64 xgmi_write_bandwidth_acc[8]; + + /* ACTIVITY */ + __u32 socket_c0_residency; + __u32 socket_gfx_busy; + __u32 dram_bandwidth_utilization; + __u64 socket_c0_residency_acc; + __u64 socket_gfx_busy_acc; + __u64 dram_bandwidth_acc; + __u32 max_dram_bandwidth; + __u64 dram_bandwidth_utilization_acc; + __u64 pcie_bandwidth_acc[4]; + + /* THROTTLERS */ + __u32 prochot_residency_acc; + __u32 ppt_residency_acc; + __u32 socket_thm_residency_acc; + __u32 vr_thm_residency_acc; + __u32 hbm_thm_residency_acc; + __u32 spare; + + /* New items at the end to maintain driver compatibility */ + __u32 gfxclk_frequency[8]; +}; + +/* Reset to default packing */ +#pragma pack() + +/* Define unique ioctl command for hsmp msgs using generic _IOWR */ +#define HSMP_BASE_IOCTL_NR 0xF8 +#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message) + +#endif /*_ASM_X86_AMD_HSMP_H_*/ diff --git a/pkgs/development/rocm-modules/6/amdsmi/default.nix b/pkgs/development/rocm-modules/6/amdsmi/default.nix new file mode 100644 index 000000000000..24ea25b3171c --- /dev/null +++ b/pkgs/development/rocm-modules/6/amdsmi/default.nix @@ -0,0 +1,79 @@ +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + pkg-config, + libdrm, + wrapPython, + autoPatchelfHook, +}: + +let + esmi_ib_src = fetchFromGitHub { + owner = "amd"; + repo = "esmi_ib_library"; + rev = "esmi_pkg_ver-3.0.3"; + hash = "sha256-q0w5c5c+CpXkklmSyfzc+sbkt4cHNxscGJA3AXwvHxQ="; + }; +in +stdenv.mkDerivation (finalAttrs: { + pname = "amdsmi"; + version = "6.3.1"; + src = fetchFromGitHub { + owner = "rocm"; + repo = "amdsmi"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-ZHr7G2/A4t3yH4S5urt1u8DZqGRcXpZUC/eavhkgPMY="; + }; + + postPatch = '' + substituteInPlace goamdsmi_shim/CMakeLists.txt \ + --replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \ + --replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#' + + cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library + mkdir -p ./esmi_ib_library/include/asm + cp ${./amd_hsmp.h} ./esmi_ib_library/include/asm/amd_hsmp.h + ''; + + patches = [ ]; + + nativeBuildInputs = [ + cmake + pkg-config + wrapPython + autoPatchelfHook + ]; + + buildInputs = [ + libdrm + ]; + + cmakeFlags = [ + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + postInstall = '' + wrapPythonProgramsIn $out + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "System management interface for AMD GPUs supported by ROCm"; + homepage = "https://github.com/ROCm/rocm_smi_lib"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; + platforms = [ "x86_64-linux" ]; + }; +}) diff --git a/pkgs/development/rocm-modules/6/aotriton/default.nix b/pkgs/development/rocm-modules/6/aotriton/default.nix new file mode 100644 index 000000000000..21c1fc8a70a1 --- /dev/null +++ b/pkgs/development/rocm-modules/6/aotriton/default.nix @@ -0,0 +1,226 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + rocm-cmake, + clr, + rocblas, + rocsolver, + gtest, + msgpack, + libxml2, + python3, + python3Packages, + openmp, + hipblas-common, + hipblas, + nlohmann_json, + triton-llvm, + rocmlir, + lapack-reference, + ninja, + ncurses, + libffi, + zlib, + zstd, + xz, + pkg-config, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + gpuTargets ? [ + # aotriton GPU support list: + # https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py + "gfx90a" + "gfx942" + "gfx1100" + "gfx1101" + ], +}: + +stdenv.mkDerivation ( + finalAttrs: + let + py = python3.withPackages (ps: [ + ps.pyyaml + ps.distutils + ps.setuptools + ps.packaging + ps.numpy + ps.wheel + ps.filelock + ps.iniconfig + ps.pluggy + ps.pybind11 + ]); + gpuTargets' = lib.concatStringsSep ";" gpuTargets; + compiler = "amdclang++"; + cFlags = "-O3 -DNDEBUG"; + triton-llvm' = triton-llvm; + in + { + pname = "aotriton"; + version = "0.8.2b"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "aotriton"; + rev = "${finalAttrs.version}"; + hash = "sha256-15Qr0EgV+pU6Hnmxqy76Nmryqr7qNkoE6iDcg9z35Hk="; + fetchSubmodules = true; + }; + env.CXX = compiler; + env.ROCM_PATH = "${clr}"; + requiredSystemFeatures = [ "big-parallel" ]; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + # Need an empty cuda.h for this to compile + # Better than pulling in unfree cuda headers + postPatch = '' + touch third_party/triton/third_party/nvidia/include/cuda.h + ''; + + doCheck = false; + doInstallCheck = false; + + nativeBuildInputs = [ + cmake + rocm-cmake + pkg-config + py + clr + ninja + ]; + + buildInputs = + [ + rocblas + rocsolver + hipblas-common + hipblas + openmp + libffi + ncurses + xz + nlohmann_json + rocmlir + + msgpack + libxml2 + python3Packages.msgpack + zlib + zstd + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + env.TRITON_OFFLINE_BUILD = 1; + env.LLVM_SYSPATH = "${triton-llvm'}"; + env.JSON_SYSPATH = nlohmann_json; + env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir"; + env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include"; + + # Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files + preConfigure = '' + mkdir third_party/triton/third_party/nvidia/backend/include/ + touch third_party/triton/third_party/nvidia/backend/include/cuda.h + find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + + find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + + + sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt + sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt + sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt + sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt + substituteInPlace third_party/triton/python/setup.py \ + --replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \ + --replace-fail 'system == "Linux"' 'False' + + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${cFlags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${cFlags}' + ) + prependToVar cmakeFlags "-GNinja" + mkdir -p /build/tmp-home + export HOME=/build/tmp-home + ''; + + # Excerpt from README: + # Note: do not run ninja separately, due to the limit of the current build system, + # ninja install will run the whole build process unconditionally. + dontBuild = true; + + installPhase = '' + runHook preInstall + ninja -v install + runHook postInstall + ''; + + cmakeFlags = + [ + "-Wno-dev" + "-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}" + "-DCMAKE_CXX_COMPILER=${compiler}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DAMDGPU_TARGETS=${gpuTargets'}" + "-DGPU_TARGETS=${gpuTargets'}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipblas-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipblas-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/example-* $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rmdir $out/bin + ''; + meta = with lib; { + description = "ROCm Ahead of Time (AOT) Triton Math Library "; + homepage = "https://github.com/ROCm/aotriton"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; + } +) diff --git a/pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch b/pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch new file mode 100644 index 000000000000..12ece274d433 --- /dev/null +++ b/pkgs/development/rocm-modules/6/aotriton/triton-remove-distutils.patch @@ -0,0 +1,68 @@ +From https://github.com/triton-lang/triton/pull/1400/files +diff --git a/python/setup.py b/python/setup.py +index 1d5eb89c591d..9dfd5a62ad63 100644 +--- a/third_party/triton/python/setup.py ++++ b/third_party/triton/python/setup.py +@@ -1,14 +1,13 @@ +-import distutils + import os + import platform + import re + import shutil + import subprocess + import sys ++import sysconfig + import tarfile + import tempfile + import urllib.request +-from distutils.version import LooseVersion + from pathlib import Path + from typing import NamedTuple + +@@ -154,10 +153,10 @@ def run(self): + "CMake must be installed to build the following extensions: " + ", ".join(e.name for e in self.extensions) + ) + +- if platform.system() == "Windows": +- cmake_version = LooseVersion(re.search(r"version\s*([\d.]+)", out.decode()).group(1)) +- if cmake_version < "3.1.0": +- raise RuntimeError("CMake >= 3.1.0 is required on Windows") ++ match = re.search(r"version\s*(?P\d+)\.(?P\d+)([\d.]+)?", out.decode()) ++ cmake_major, cmake_minor = int(match.group("major")), int(match.group("minor")) ++ if (cmake_major, cmake_minor) < (3, 20): ++ raise RuntimeError("CMake >= 3.20.0 is required") + + for ext in self.extensions: + self.build_extension(ext) +@@ -176,7 +175,7 @@ def build_extension(self, ext): + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + # python directories +- python_include_dir = distutils.sysconfig.get_python_inc() ++ python_include_dir = sysconfig.get_path("platinclude") + cmake_args = [ + "-DLLVM_ENABLE_WERROR=ON", + "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, +@@ -200,9 +199,8 @@ def build_extension(self, ext): + cmake_args += ["-A", "x64"] + build_args += ["--", "/m"] + else: +- import multiprocessing + cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg] +- build_args += ['-j' + str(2 * multiprocessing.cpu_count())] ++ build_args += ['-j' + str(2 * os.cpu_count())] + + env = os.environ.copy() + subprocess.check_call(["cmake", self.base_dir] + cmake_args, cwd=self.build_temp, env=env) +@@ -245,6 +243,11 @@ def build_extension(self, ext): + "Topic :: Software Development :: Build Tools", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.6", ++ "Programming Language :: Python :: 3.7", ++ "Programming Language :: Python :: 3.8", ++ "Programming Language :: Python :: 3.9", ++ "Programming Language :: Python :: 3.10", ++ "Programming Language :: Python :: 3.11", + ], + test_suite="tests", + extras_require={ diff --git a/pkgs/development/rocm-modules/6/clang-ocl/default.nix b/pkgs/development/rocm-modules/6/clang-ocl/default.nix deleted file mode 100644 index 0ef8a04a744b..000000000000 --- a/pkgs/development/rocm-modules/6/clang-ocl/default.nix +++ /dev/null @@ -1,45 +0,0 @@ -{ - lib, - stdenv, - fetchFromGitHub, - rocmUpdateScript, - cmake, - rocm-cmake, - rocm-device-libs, -}: - -stdenv.mkDerivation (finalAttrs: { - pname = "clang-ocl"; - version = "6.0.2"; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "clang-ocl"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-uMSvcVJj+me2E+7FsXZ4l4hTcK6uKEegXpkHGcuist0="; - }; - - nativeBuildInputs = [ - cmake - rocm-cmake - ]; - - buildInputs = [ rocm-device-libs ]; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - meta = with lib; { - description = "OpenCL compilation with clang compiler"; - homepage = "https://github.com/ROCm/clang-ocl"; - license = with licenses; [ mit ]; - maintainers = teams.rocm.members; - platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; - }; -}) diff --git a/pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch b/pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch new file mode 100644 index 000000000000..5a36a2c2c74d --- /dev/null +++ b/pkgs/development/rocm-modules/6/clr/cmake-find-x11-libgl.patch @@ -0,0 +1,40 @@ +diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake +index 3f233b72f..67bdc62ee 100644 +--- a/rocclr/cmake/ROCclr.cmake ++++ b/rocclr/cmake/ROCclr.cmake +@@ -44,6 +44,19 @@ find_package(Threads REQUIRED) + + find_package(AMD_OPENCL) + ++# Find X11 package ++find_package(X11 REQUIRED) ++if(NOT X11_FOUND) ++ message(FATAL_ERROR "X11 libraries not found") ++endif() ++ ++# Find OpenGL package ++find_package(OpenGL REQUIRED) ++if(NOT OpenGL_FOUND) ++ message(FATAL_ERROR "OpenGL not found") ++endif() ++ ++ + add_library(rocclr STATIC) + + include(ROCclrCompilerOptions) +@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC + ${ROCCLR_SRC_DIR}/device + ${ROCCLR_SRC_DIR}/elf + ${ROCCLR_SRC_DIR}/include ++ ${X11_INCLUDE_DIR} ++ ${OPENGL_INCLUDE_DIR} + ${AMD_OPENCL_INCLUDE_DIRS}) + +-target_link_libraries(rocclr PUBLIC Threads::Threads) ++target_link_libraries(rocclr PUBLIC ++ Threads::Threads ++ ${X11_LIBRARIES} ++ ${OPENGL_LIBRARIES}) + # IPC on Windows is not supported + if(UNIX) + target_link_libraries(rocclr PUBLIC rt) diff --git a/pkgs/development/rocm-modules/6/clr/default.nix b/pkgs/development/rocm-modules/6/clr/default.nix index a3dc49695c50..d5d78bd81644 100644 --- a/pkgs/development/rocm-modules/6/clr/default.nix +++ b/pkgs/development/rocm-modules/6/clr/default.nix @@ -4,52 +4,58 @@ callPackage, fetchFromGitHub, fetchpatch, - fetchurl, rocmUpdateScript, makeWrapper, cmake, perl, - clang, hip-common, hipcc, rocm-device-libs, rocm-comgr, rocm-runtime, + rocm-core, roctracer, rocminfo, rocm-smi, numactl, + libffi, + zstd, + zlib, libGL, libxml2, libX11, python3Packages, + rocm-merged-llvm, + khronos-ocl-icd-loader, + gcc-unwrapped, + writeShellScriptBin, + localGpuTargets ? null, }: let + inherit (rocm-core) ROCM_LIBPATCH_VERSION; + hipClang = rocm-merged-llvm; + hipClangPath = "${hipClang}/bin"; wrapperArgs = [ "--prefix PATH : $out/bin" "--prefix LD_LIBRARY_PATH : ${rocm-runtime}" "--set HIP_PLATFORM amd" "--set HIP_PATH $out" - "--set HIP_CLANG_PATH ${clang}/bin" + "--set HIP_CLANG_PATH ${hipClangPath}" "--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode" "--set HSA_PATH ${rocm-runtime}" "--set ROCM_PATH $out" ]; - - # https://github.com/NixOS/nixpkgs/issues/305641 - # Not needed when 3.29.2 is in unstable - cmake' = cmake.overrideAttrs (old: rec { - version = "3.29.2"; - src = fetchurl { - url = "https://cmake.org/files/v${lib.versions.majorMinor version}/cmake-${version}.tar.gz"; - hash = "sha256-NttLaSaqt0G6bksuotmckZMiITIwi03IJNQSPLcwNS4="; - }; - }); + amdclang = writeShellScriptBin "amdclang" '' + exec clang "$@" + ''; + amdclangxx = writeShellScriptBin "amdclang++" '' + exec clang++ "$@" + ''; in stdenv.mkDerivation (finalAttrs: { pname = "clr"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ "out" @@ -60,15 +66,17 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "clr"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-ZMpA7vCW2CcpGdBLZfPimMHcgjhN1PHuewJiYwZMgGY="; + hash = "sha256-wo3kwk6HQJsP+ycaVh2mmMjEgGlj/Z6KXNXOXbJ1KLs="; }; nativeBuildInputs = [ makeWrapper - cmake' + cmake perl python3Packages.python python3Packages.cppheaderparser + amdclang + amdclangxx ]; buildInputs = [ @@ -76,9 +84,15 @@ stdenv.mkDerivation (finalAttrs: { libGL libxml2 libX11 + khronos-ocl-icd-loader + hipClang + libffi + zstd + zlib ]; propagatedBuildInputs = [ + rocm-core rocm-device-libs rocm-comgr rocm-runtime @@ -86,6 +100,7 @@ stdenv.mkDerivation (finalAttrs: { ]; cmakeFlags = [ + "-DCMAKE_BUILD_TYPE=Release" "-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries "-DCLR_BUILD_HIP=ON" "-DCLR_BUILD_OCL=ON" @@ -94,6 +109,9 @@ stdenv.mkDerivation (finalAttrs: { "-DHIP_PLATFORM=amd" "-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext" "-DROCM_PATH=${rocminfo}" + "-DBUILD_ICD=ON" + "-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds? + "-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}" # Temporarily set variables to work around upstream CMakeLists issue # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed @@ -102,26 +120,36 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_LIBDIR=lib" ]; + env.LLVM_DIR = ""; + patches = [ + ./cmake-find-x11-libgl.patch + (fetchpatch { - name = "add-missing-operators.patch"; - url = "https://github.com/ROCm/clr/commit/86bd518981b364c138f9901b28a529899d8654f3.patch"; - hash = "sha256-lbswri+zKLxif0hPp4aeJDeVfadhWZz4z+m+G2XcCPI="; + # Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99 + sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA="; + url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch"; }) (fetchpatch { - name = "static-functions.patch"; - url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch"; - hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs="; + # improve rocclr isa compatibility check + sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs="; + url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch"; }) (fetchpatch { - name = "extend-hip-isa-compatibility-check.patch"; - url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch"; - hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI="; + # [PATCH] Improve hipamd compat check + sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8="; + url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch"; }) (fetchpatch { - name = "improve-rocclr-isa-compatibility-check.patch"; - url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch"; - hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y="; + # [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition + # Fixes crash due to UB in KernelBlitManager::setArgument + sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I="; + url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch"; + }) + (fetchpatch { + # [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues + sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4="; + url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch"; }) ]; @@ -135,79 +163,117 @@ stdenv.mkDerivation (finalAttrs: { --replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" "" substituteInPlace hipamd/src/hip_embed_pch.sh \ - --replace "\''$LLVM_DIR/bin/clang" "${clang}/bin/clang" - - # https://lists.debian.org/debian-ai/2024/02/msg00178.html - substituteInPlace rocclr/utils/flags.hpp \ - --replace-fail "HIP_USE_RUNTIME_UNBUNDLER, false" "HIP_USE_RUNTIME_UNBUNDLER, true" + --replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang" substituteInPlace opencl/khronos/icd/loader/icd_platform.h \ --replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \ '#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";' + + # new unbundler has better error messages, defaulting it on + substituteInPlace rocclr/utils/flags.hpp \ + --replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true" ''; postInstall = '' + chmod +x $out/bin/* patchShebangs $out/bin - # hipcc.bin and hipconfig.bin is mysteriously never installed - cp -a ${hipcc}/bin/{hipcc.bin,hipconfig.bin} $out/bin + cp ${amdclang}/bin/* $out/bin/ + cp ${amdclangxx}/bin/* $out/bin/ - wrapProgram $out/bin/hipcc.bin ${lib.concatStringsSep " " wrapperArgs} - wrapProgram $out/bin/hipconfig.bin ${lib.concatStringsSep " " wrapperArgs} - wrapProgram $out/bin/hipcc.pl ${lib.concatStringsSep " " wrapperArgs} - wrapProgram $out/bin/hipconfig.pl ${lib.concatStringsSep " " wrapperArgs} + for prog in hip{cc,config}{,.pl}; do + wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs} + done + + mkdir -p $out/nix-support/ + echo ' + export HIP_PATH="${placeholder "out"}" + export HIP_PLATFORM=amd + export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode" + export NIX_CC_USE_RESPONSE_FILE=0 + export HIP_CLANG_PATH="${hipClangPath}" + export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}" + export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook # Just link rocminfo, it's easier ln -s ${rocminfo}/bin/* $out/bin + ln -s ${rocm-core}/include/* $out/include/ # Replace rocm-opencl-icd functionality mkdir -p $icd/etc/OpenCL/vendors echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd # add version info to output (downstream rocmPackages look for this) - mkdir $out/.info - echo "${finalAttrs.version}" > $out/.info/version + ln -s ${rocm-core}/.info/ $out/.info + + ln -s ${hipClang} $out/llvm ''; - passthru = { - # All known and valid general GPU targets - # We cannot use this for each ROCm library, as each defines their own supported targets - # See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix - gpuTargets = lib.forEach [ - "803" - "900" - "906" - "908" - "90a" - "940" - "941" - "942" - "1010" - "1012" - "1030" - "1100" - "1101" - "1102" - ] (target: "gfx${target}"); + disallowedRequisites = [ + gcc-unwrapped + ]; - updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - page = "tags?per_page=1"; - filter = ".[0].name | split(\"-\") | .[1]"; - }; + passthru = + { + # All known and valid general GPU targets + # We cannot use this for each ROCm library, as each defines their own supported targets + # See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix + # Generic targets are not yet available in rocm-6.3.1 llvm + gpuTargets = lib.forEach [ + # "9-generic" + "900" # MI25, Vega 56/64 + "906" # MI50/60, Radeon VII + "908" # MI100 + "90a" # MI210 / MI250 + # "9-4-generic" + # 940/1 - never released publicly, maybe HPE cray specific MI3xx? + "942" # MI300 + # "10-1-generic" + "1010" + "1012" + # "10-3-generic" + "1030" # W6800, various Radeon cards + # "11-generic" + "1100" + "1101" + "1102" + ] (target: "gfx${target}"); - impureTests = { - rocm-smi = callPackage ./test-rocm-smi.nix { - inherit rocm-smi; - clr = finalAttrs.finalPackage; + inherit hipClangPath; + + updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; }; - opencl-example = callPackage ./test-opencl-example.nix { - clr = finalAttrs.finalPackage; + + impureTests = { + rocm-smi = callPackage ./test-rocm-smi.nix { + inherit rocm-smi; + clr = finalAttrs.finalPackage; + }; + opencl-example = callPackage ./test-opencl-example.nix { + clr = finalAttrs.finalPackage; + }; }; + + selectGpuTargets = + { + supported ? [ ], + }: + supported; + gpuArchSuffix = ""; + } + // lib.optionalAttrs (localGpuTargets != null) { + inherit localGpuTargets; + gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets); + selectGpuTargets = + { + supported ? [ ], + }: + if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported; }; - }; meta = with lib; { description = "AMD Common Language Runtime for hipamd, opencl, and rocclr"; @@ -215,8 +281,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch b/pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch new file mode 100644 index 000000000000..49c789d17278 --- /dev/null +++ b/pkgs/development/rocm-modules/6/clr/fix-null-stream-sync-perf.patch @@ -0,0 +1,101 @@ +From 17e7b7c2ef6023be77b22ae83162e78de0a5a936 Mon Sep 17 00:00:00 2001 +From: Anusha GodavarthySurya +Date: Fri, 11 Oct 2024 17:10:12 +0000 +Subject: [PATCH] SWDEV-472840 SWDEV-461980 - Fix null stream sync performance + +=> If null stream is not created during sync skip nullstrm creation +=> Do cpu wait on blocking & null stream if it exists + +Change-Id: I90d6ced6a2dd1782ba58f3fed4e3608fc0efa55a +--- + hipamd/src/hip_device.cpp | 23 +++++++++++++++++++---- + hipamd/src/hip_internal.hpp | 2 +- + hipamd/src/hip_stream.cpp | 22 ++++++++++++++++------ + 3 files changed, 36 insertions(+), 11 deletions(-) + +diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp +index 9f6a8e3d0..20889b0fe 100644 +--- a/hipamd/src/hip_device.cpp ++++ b/hipamd/src/hip_device.cpp +@@ -257,15 +257,30 @@ void Device::destroyAllStreams() { + } + + // ================================================================================================ +-void Device::SyncAllStreams( bool cpu_wait) { ++void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) { + // Make a local copy to avoid stalls for GPU finish with multiple threads + std::vector streams; + streams.reserve(streamSet.size()); + { + amd::ScopedLock lock(streamSetLock); +- for (auto it : streamSet) { +- streams.push_back(it); +- it->retain(); ++ if (wait_blocking_streams_only) { ++ auto null_stream = GetNullStream(); ++ for (auto it : streamSet) { ++ if (it != null_stream && (it->Flags() & hipStreamNonBlocking) == 0) { ++ streams.push_back(it); ++ it->retain(); ++ } ++ } ++ // Add null stream to the end of the list so that wait happens after all blocking streams. ++ if (null_stream != nullptr) { ++ streams.push_back(null_stream); ++ null_stream->retain(); ++ } ++ } else { ++ for (auto it : streamSet) { ++ streams.push_back(it); ++ it->retain(); ++ } + } + } + for (auto it : streams) { +diff --git a/hipamd/src/hip_internal.hpp b/hipamd/src/hip_internal.hpp +index d0a6dca57..47749c012 100644 +--- a/hipamd/src/hip_internal.hpp ++++ b/hipamd/src/hip_internal.hpp +@@ -595,7 +595,7 @@ class stream_per_thread { + + void destroyAllStreams(); + +- void SyncAllStreams( bool cpu_wait = true); ++ void SyncAllStreams( bool cpu_wait = true, bool wait_blocking_streams_only = false); + + bool StreamCaptureBlocking(); + +diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp +index 937374977..76a732acd 100644 +--- a/hipamd/src/hip_stream.cpp ++++ b/hipamd/src/hip_stream.cpp +@@ -357,13 +357,23 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) { + HIP_RETURN(hipErrorStreamCaptureUnsupported); + } + } +- bool wait = (stream == nullptr || stream == hipStreamLegacy) ? true : false; +- auto hip_stream = hip::getStream(stream, wait); + +- // Wait for the current host queue +- hip_stream->finish(); +- // Release freed memory for all memory pools on the device +- hip_stream->GetDevice()->ReleaseFreedMemory(); ++ if (stream == nullptr) { ++ // Do cpu wait on null stream and only on blocking streams ++ constexpr bool WaitblockingStreamOnly = true; ++ getCurrentDevice()->SyncAllStreams(true, WaitblockingStreamOnly); ++ ++ // Release freed memory for all memory pools on the device ++ getCurrentDevice()->ReleaseFreedMemory(); ++ } else { ++ constexpr bool wait = false; ++ auto hip_stream = hip::getStream(stream, wait); ++ ++ // Wait for the current host queue ++ hip_stream->finish(); ++ // Release freed memory for all memory pools on the device ++ hip_stream->GetDevice()->ReleaseFreedMemory(); ++ } + return hipSuccess; + } + diff --git a/pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix b/pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix new file mode 100644 index 000000000000..cdc27a280856 --- /dev/null +++ b/pkgs/development/rocm-modules/6/composable_kernel/ck4inductor.nix @@ -0,0 +1,43 @@ +{ + buildPythonPackage, + python, + composable_kernel_build, + lib, + setuptools, + setuptools-scm, + rocm-merged-llvm, +}: +buildPythonPackage { + pyproject = true; + pname = "ck4inductor"; + build-system = [ + setuptools + setuptools-scm + ]; + version = "6.4.0"; + inherit (composable_kernel_build) src; + pythonImportsCheck = [ + "ck4inductor" + "ck4inductor.universal_gemm.gen_instances" + "ck4inductor.universal_gemm.gen_instances" + "ck4inductor.universal_gemm.op" + ]; + propagatedBuildInputs = [ + # At runtime will fail to compile anything with ck4inductor without this + # can't easily use in checks phase because most of the compiler machinery is in torch + rocm-merged-llvm + ]; + checkPhase = '' + if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then + echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor" + exit 1 + fi + ''; + meta = with lib; { + description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations"; + homepage = "https://github.com/ROCm/composable_kernel"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; +} diff --git a/pkgs/development/rocm-modules/6/composable_kernel/default.nix b/pkgs/development/rocm-modules/6/composable_kernel/default.nix index 16f3f05f7509..b9d52fce4999 100644 --- a/pkgs/development/rocm-modules/6/composable_kernel/default.nix +++ b/pkgs/development/rocm-modules/6/composable_kernel/default.nix @@ -5,20 +5,36 @@ rocmUpdateScript, cmake, rocm-cmake, + rocm-merged-llvm, clr, - openmp, - clang-tools-extra, + rocm-device-libs, + rocminfo, + hipify, git, gtest, zstd, + ninja, buildTests ? false, buildExamples ? false, - gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx1030" ... ] + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ] + ), }: stdenv.mkDerivation (finalAttrs: { - pname = "composable_kernel"; - version = "6.0.2"; + pname = "composable_kernel${clr.gpuArchSuffix}"; + # This version must be PEP 440 compatible because it's the version of the ck4inductor python package too + version = "6.4.0-unstable-20241220"; outputs = [ @@ -31,32 +47,68 @@ stdenv.mkDerivation (finalAttrs: { "example" ]; + patches = [ + # for Gentoo this gives a significant speedup in build times + # not observing speedup. possibly because our LLVM has been patched to fix amdgpu-early-inline-all issues? + # ./disable-amdgpu-inline.patch + ]; + src = fetchFromGitHub { owner = "ROCm"; repo = "composable_kernel"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-NCqMganmNyQfz3X+KQOrfrimnrgd3HbAGK5DeC4+J+o="; + rev = "07339c738396ebeae57374771ded4dcf11bddf1e"; + hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs="; }; nativeBuildInputs = [ git cmake - rocm-cmake + rocminfo clr - clang-tools-extra + hipify + ninja zstd ]; - buildInputs = [ openmp ]; + buildInputs = [ + rocm-cmake + clr + zstd + ]; + + strictDeps = true; + enableParallelBuilding = true; + requiredSystemFeatures = [ "big-parallel" ]; + env.ROCM_PATH = clr; + env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + # "-DDL_KERNELS=ON" + # Not turned on because don't think deps require it, slightly speeds up build + # "-DCK_USE_CODEGEN=ON" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DBUILD_DEV=OFF" + "-DROCM_PATH=${clr}" + "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" + + # FP8 can build for 908/90a but very slow build + # and produces unusably slow kernels that are huge + "-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF" ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + # We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS + # per readme this is required if archs are dissimilar + # In rocm-6.3.x not setting any arch flag worked + # but setting dissimilar arches always failed + "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names @@ -64,22 +116,45 @@ stdenv.mkDerivation (finalAttrs: { # No flags to build selectively it seems... postPatch = - lib.optionalString (!buildTests) '' + '' + export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode + '' + + lib.optionalString (!buildTests) '' substituteInPlace CMakeLists.txt \ - --replace "add_subdirectory(test)" "" + --replace-fail "add_subdirectory(test)" "" + substituteInPlace codegen/CMakeLists.txt \ + --replace-fail "include(ROCMTest)" "" '' + lib.optionalString (!buildExamples) '' substituteInPlace CMakeLists.txt \ - --replace "add_subdirectory(example)" "" + --replace-fail "add_subdirectory(example)" "" '' + '' substituteInPlace CMakeLists.txt \ - --replace "add_subdirectory(profiler)" "" + --replace-fail "add_subdirectory(profiler)" "" ''; + # Clamp parallelism based on free memory at build start to avoid OOM + preConfigure = '' + export NINJA_SUMMARIZE_BUILD=1 + export NINJA_STATUS="[%r jobs | %P %f/%t @ %o/s | %w | ETA %W ] " + MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo) + MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo) + APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE)) + MAX_CORES=$((1 + APPX_GB / 2)) + MAX_CORES_LINK=$((1 + APPX_GB / 8)) + MAX_CORES_LINK=$((MAX_CORES_LINK > NIX_BUILD_CORES ? NIX_BUILD_CORES : MAX_CORES_LINK)) + export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))" + echo "Picked new core limits NIX_BUILD_CORES=$NIX_BUILD_CORES LINK_CORES=$LINK_CORES based on available mem: $APPX_GB GB" + cmakeFlagsArray+=( + "-DCK_PARALLEL_LINK_JOBS=$LINK_CORES" + "-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES" + ) + ''; + postInstall = '' - zstd --rm $out/lib/libdevice_operations.a + zstd --rm $out/lib/libdevice_*_operations.a '' + lib.optionalString buildTests '' mkdir -p $test/bin @@ -92,21 +167,17 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; - # Times out otherwise - requiredSystemFeatures = [ "big-parallel" ]; - meta = with lib; { description = "Performance portable programming model for machine learning tensor operators"; homepage = "https://github.com/ROCm/composable_kernel"; license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; + # Builds which don't don't target any gfx9 cause cmake errors in dependent projects + broken = !finalAttrs.passthru.anyGfx9Target; }; }) diff --git a/pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch b/pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch new file mode 100644 index 000000000000..5ccf5239cb1e --- /dev/null +++ b/pkgs/development/rocm-modules/6/composable_kernel/disable-amdgpu-inline.patch @@ -0,0 +1,16 @@ +Flag -amdgpu-early-inline-all explodes memory consumption, so that build does not fit 64GB of RAM. +LLVM bug: https://github.com/llvm/llvm-project/issues/86332 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -213,11 +213,6 @@ if(NOT WIN32 AND check-coerce AND ${hip_VERSION_FLAT} GREATER 600241132 AND ${hi + message("Adding the amdgpu-coerce-illegal-types=1") + add_compile_options("SHELL: -mllvm -amdgpu-coerce-illegal-types=1") + endif() +-if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132) +- message("Adding -amdgpu-early-inline-all=true and -amdgpu-function-calls=false") +- add_compile_options("SHELL: -mllvm -amdgpu-early-inline-all=true") +- add_compile_options("SHELL: -mllvm -amdgpu-function-calls=false") +-endif() + # + # Seperate linking jobs from compiling + # Too many concurrent linking jobs can break the build diff --git a/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix b/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix index ff98df6eff3b..c0e40e16965f 100644 --- a/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix +++ b/pkgs/development/rocm-modules/6/composable_kernel/unpack.nix @@ -1,20 +1,29 @@ { runCommandLocal, composable_kernel_build, + ck4inductor, zstd, }: let ck = composable_kernel_build; in -runCommandLocal "unpack-${ck.name}" +runCommandLocal "unpack-${ck.pname}" { nativeBuildInputs = [ zstd ]; - meta = ck.meta; + inherit (ck) meta; } '' mkdir -p $out cp -r --no-preserve=mode ${ck}/* $out - zstd -dv --rm $out/lib/libdevice_operations.a.zst -o $out/lib/libdevice_operations.a + for zs in $out/lib/libdevice_*_operations.a.zst; do + zstd -dv --rm "$zs" -o "''${zs/.zst}" + done substituteInPlace $out/lib/cmake/composable_kernel/*.cmake \ --replace "${ck}" "$out" + cp -r --no-preserve=mode ${ck4inductor}/* $out/ + + if [ ! -e $out/lib/python3.12/site-packages/ck4inductor/library/src/tensor_operation_instance/gpu/gemm_universal ]; then + echo "Missing gemm_universal at expected path for pytorch CK backend" + exit 1 + fi '' diff --git a/pkgs/development/rocm-modules/6/default.nix b/pkgs/development/rocm-modules/6/default.nix index 6cd85b0a575a..0a31e5575091 100644 --- a/pkgs/development/rocm-modules/6/default.nix +++ b/pkgs/development/rocm-modules/6/default.nix @@ -1,530 +1,507 @@ -{ stdenv -, lib -, config -, callPackage -, recurseIntoAttrs -, symlinkJoin -, fetchFromGitHub -, cudaPackages -, python3Packages -, elfutils -, boost179 -, opencv -, ffmpeg_4 -, libjpeg_turbo +{ + lib, + config, + callPackage, + newScope, + recurseIntoAttrs, + symlinkJoin, + fetchFromGitHub, + ffmpeg_4, + boost179, + opencv, + libjpeg_turbo, + python3Packages, + triton-llvm, + openmpi, + rocmGpuArches ? [ ], }: let - rocmUpdateScript = callPackage ./update.nix { }; -in rec { - ## ROCm ## - llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript rocm-device-libs rocm-runtime rocm-thunk clr; }); - - rocm-core = callPackage ./rocm-core { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-cmake = callPackage ./rocm-cmake { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-thunk = callPackage ./rocm-thunk { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-smi = python3Packages.callPackage ./rocm-smi { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - # Eventually will be in the LLVM repo - rocm-device-libs = callPackage ./rocm-device-libs { - inherit rocmUpdateScript rocm-cmake; - stdenv = llvm.rocmClangStdenv; - }; - - rocm-runtime = callPackage ./rocm-runtime { - inherit rocmUpdateScript rocm-device-libs rocm-thunk; - stdenv = llvm.rocmClangStdenv; - }; - - # Eventually will be in the LLVM repo - rocm-comgr = callPackage ./rocm-comgr { - inherit rocmUpdateScript rocm-cmake rocm-device-libs; - stdenv = llvm.rocmClangStdenv; - }; - - rocminfo = callPackage ./rocminfo { - inherit rocmUpdateScript rocm-cmake rocm-runtime; - stdenv = llvm.rocmClangStdenv; - }; - - clang-ocl = callPackage ./clang-ocl { - inherit rocmUpdateScript rocm-cmake rocm-device-libs; - stdenv = llvm.rocmClangStdenv; - }; - - # Unfree - hsa-amd-aqlprofile-bin = callPackage ./hsa-amd-aqlprofile-bin { - stdenv = llvm.rocmClangStdenv; - }; - - # Broken, too many errors - rdc = callPackage ./rdc { - inherit rocmUpdateScript rocm-smi rocm-runtime stdenv; - # stdenv = llvm.rocmClangStdenv; - }; - - rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { inherit stdenv; }; - - hip-common = callPackage ./hip-common { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - # Eventually will be in the LLVM repo - hipcc = callPackage ./hipcc { - inherit rocmUpdateScript; - stdenv = llvm.rocmClangStdenv; - }; - - # Replaces hip, opencl-runtime, and rocclr - clr = callPackage ./clr { - inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo rocm-smi; - inherit (llvm) clang; - stdenv = llvm.rocmClangStdenv; - }; - - hipify = callPackage ./hipify { - inherit rocmUpdateScript; - inherit (llvm) clang; - stdenv = llvm.rocmClangStdenv; - }; - - # Needs GCC - rocprofiler = callPackage ./rocprofiler { - inherit rocmUpdateScript clr rocm-core rocm-thunk rocm-device-libs roctracer rocdbgapi rocm-smi hsa-amd-aqlprofile-bin stdenv; - inherit (llvm) clang; - }; - - # Needs GCC - roctracer = callPackage ./roctracer { - inherit rocmUpdateScript rocm-device-libs rocm-runtime clr stdenv; - }; - - rocgdb = callPackage ./rocgdb { - inherit rocmUpdateScript rocdbgapi; - stdenv = llvm.rocmClangStdenv; - }; - - rocdbgapi = callPackage ./rocdbgapi { - inherit rocmUpdateScript rocm-cmake rocm-comgr rocm-runtime; - stdenv = llvm.rocmClangStdenv; - }; - - rocr-debug-agent = callPackage ./rocr-debug-agent { - inherit rocmUpdateScript clr rocdbgapi; - stdenv = llvm.rocmClangStdenv; - }; - - rocprim = callPackage ./rocprim { - inherit rocmUpdateScript rocm-cmake clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocsparse = callPackage ./rocsparse { - inherit rocmUpdateScript rocm-cmake rocprim clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocthrust = callPackage ./rocthrust { - inherit rocmUpdateScript rocm-cmake rocprim clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocrand = callPackage ./rocrand { - inherit rocmUpdateScript rocm-cmake clr; - stdenv = llvm.rocmClangStdenv; - }; - - hiprand = callPackage ./hiprand { - inherit rocmUpdateScript rocm-cmake clr rocrand; - stdenv = llvm.rocmClangStdenv; - }; - - rocfft = callPackage ./rocfft { - inherit rocmUpdateScript rocm-cmake rocrand rocfft clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rccl = callPackage ./rccl { - inherit rocmUpdateScript rocm-cmake rocm-smi clr hipify; - stdenv = llvm.rocmClangStdenv; - }; - - hipcub = callPackage ./hipcub { - inherit rocmUpdateScript rocm-cmake rocprim clr; - stdenv = llvm.rocmClangStdenv; - }; - - hipsparse = callPackage ./hipsparse { - inherit rocmUpdateScript rocm-cmake rocsparse clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - hipfort = callPackage ./hipfort { - inherit rocmUpdateScript rocm-cmake; - stdenv = llvm.rocmClangStdenv; - }; - - hipfft = callPackage ./hipfft { - inherit rocmUpdateScript rocm-cmake rocfft clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - tensile = python3Packages.callPackage ./tensile { - inherit rocmUpdateScript rocminfo; - stdenv = llvm.rocmClangStdenv; - }; - - rocblas = callPackage ./rocblas { - inherit rocmUpdateScript rocm-cmake clr tensile; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocsolver = callPackage ./rocsolver { - inherit rocmUpdateScript rocm-cmake rocblas rocsparse clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocwmma = callPackage ./rocwmma { - inherit rocmUpdateScript rocm-cmake rocm-smi rocblas clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocalution = callPackage ./rocalution { - inherit rocmUpdateScript rocm-cmake rocprim rocsparse rocrand rocblas clr; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocmlir = callPackage ./rocmlir { - inherit rocmUpdateScript rocm-cmake rocminfo clr; - stdenv = llvm.rocmClangStdenv; - }; - - rocmlir-rock = rocmlir.override { - buildRockCompiler = true; - }; - - hipsolver = callPackage ./hipsolver { - inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr; - stdenv = llvm.rocmClangStdenv; - }; - - hipblas = callPackage ./hipblas { - inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr; - stdenv = llvm.rocmClangStdenv; - }; - - # hipBlasLt - Very broken with Tensile at the moment, only supports GFX9 - # hipTensor - Only supports GFX9 - - composable_kernel = callPackage ./composable_kernel/unpack.nix { - composable_kernel_build = callPackage ./composable_kernel { - inherit rocmUpdateScript rocm-cmake clr; - inherit (llvm) openmp clang-tools-extra; + outer = lib.makeScope newScope ( + self: + let + inherit (self) llvm; + pyPackages = python3Packages; + openmpi-orig = openmpi; + in + { + inherit rocmGpuArches; + buildTests = false; + buildBenchmarks = false; stdenv = llvm.rocmClangStdenv; - }; - }; - half = callPackage ./half { - inherit rocmUpdateScript rocm-cmake; - stdenv = llvm.rocmClangStdenv; - }; + rocmPath = self.callPackage ./rocm-path { }; + rocmUpdateScript = self.callPackage ./update.nix { }; - miopen = callPackage ./miopen { - inherit rocmUpdateScript rocm-cmake rocblas clang-ocl composable_kernel rocm-comgr clr rocm-docs-core half roctracer; - inherit (llvm) clang-tools-extra; - stdenv = llvm.rocmClangStdenv; - rocmlir = rocmlir-rock; - boost = boost179.override { enableStatic = true; }; - }; + ## ROCm ## + llvm = recurseIntoAttrs ( + callPackage ./llvm/default.nix { + inherit (self) rocm-device-libs rocm-runtime; + } + ); + inherit (self.llvm) rocm-merged-llvm clang openmp; - miopen-hip = miopen; - - migraphx = callPackage ./migraphx { - inherit rocmUpdateScript rocm-cmake rocblas composable_kernel miopen clr half rocm-device-libs; - inherit (llvm) openmp clang-tools-extra; - stdenv = llvm.rocmClangStdenv; - rocmlir = rocmlir-rock; - }; - - rpp = callPackage ./rpp { - inherit rocmUpdateScript rocm-cmake rocm-docs-core clr half; - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rpp-hip = rpp.override { - useOpenCL = false; - useCPU = false; - }; - - rpp-opencl = rpp.override { - useOpenCL = true; - useCPU = false; - }; - - rpp-cpu = rpp.override { - useOpenCL = false; - useCPU = true; - }; - - mivisionx = callPackage ./mivisionx { - inherit rocmUpdateScript rocm-cmake rocm-device-libs clr rpp rocblas miopen migraphx half rocm-docs-core; - inherit (llvm) clang openmp; - opencv = opencv.override { enablePython = true; }; - ffmpeg = ffmpeg_4; - stdenv = llvm.rocmClangStdenv; - - # Unfortunately, rocAL needs a custom libjpeg-turbo until further notice - # See: https://github.com/ROCm/MIVisionX/issues/1051 - libjpeg_turbo = libjpeg_turbo.overrideAttrs { - version = "2.0.6.1"; - - src = fetchFromGitHub { - owner = "rrawther"; - repo = "libjpeg-turbo"; - rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb"; - sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY="; + rocm-core = self.callPackage ./rocm-core { }; + amdsmi = pyPackages.callPackage ./amdsmi { + inherit (self) rocmUpdateScript; }; - # overwrite all patches, since patches for newer version do not apply - patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ]; - }; - }; + rocm-cmake = self.callPackage ./rocm-cmake { }; - mivisionx-hip = mivisionx.override { - rpp = rpp-hip; - useOpenCL = false; - useCPU = false; - }; + rocm-smi = pyPackages.callPackage ./rocm-smi { + inherit (self) rocmUpdateScript; + }; - mivisionx-cpu = mivisionx.override { - rpp = rpp-cpu; - useOpenCL = false; - useCPU = true; - }; + rocm-device-libs = self.callPackage ./rocm-device-libs { + inherit (llvm) rocm-merged-llvm; + }; - ## Meta ## - # Emulate common ROCm meta layout - # These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations - # Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues! - # See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png - # See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html - meta = rec { - rocm-developer-tools = symlinkJoin { - name = "rocm-developer-tools-meta"; + rocm-runtime = self.callPackage ./rocm-runtime { + inherit (llvm) rocm-merged-llvm; + }; - paths = [ - hsa-amd-aqlprofile-bin - rocm-core - rocr-debug-agent - roctracer - rocdbgapi - rocprofiler - rocgdb - rocm-language-runtime - ]; - }; + rocm-comgr = self.callPackage ./rocm-comgr { + inherit (llvm) rocm-merged-llvm; + }; - rocm-ml-sdk = symlinkJoin { - name = "rocm-ml-sdk-meta"; + rocminfo = self.callPackage ./rocminfo { }; - paths = [ - rocm-core - miopen-hip - rocm-hip-sdk - rocm-ml-libraries - ]; - }; + # Unfree + hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { }; - rocm-ml-libraries = symlinkJoin { - name = "rocm-ml-libraries-meta"; + rdc = self.callPackage ./rdc { }; - paths = [ - llvm.clang - llvm.mlir - llvm.openmp - rocm-core - miopen-hip - rocm-hip-libraries - ]; - }; + rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { }; - rocm-hip-sdk = symlinkJoin { - name = "rocm-hip-sdk-meta"; + hip-common = self.callPackage ./hip-common { }; - paths = [ - rocprim - rocalution - hipfft - rocm-core - hipcub - hipblas - rocrand - rocfft - rocsparse - rccl - rocthrust - rocblas - hipsparse - hipfort - rocwmma - hipsolver - rocsolver - rocm-hip-libraries - rocm-hip-runtime-devel - ]; - }; + # Eventually will be in the LLVM repo + hipcc = self.callPackage ./hipcc { + inherit (llvm) rocm-merged-llvm; + }; - rocm-hip-libraries = symlinkJoin { - name = "rocm-hip-libraries-meta"; + # Replaces hip, opencl-runtime, and rocclr + clr = self.callPackage ./clr { }; - paths = [ - rocblas - hipfort - rocm-core - rocsolver - rocalution - rocrand - hipblas - rocfft - hipfft - rccl - rocsparse - hipsparse - hipsolver - rocm-hip-runtime - ]; - }; + aotriton = self.callPackage ./aotriton { }; - rocm-openmp-sdk = symlinkJoin { - name = "rocm-openmp-sdk-meta"; + hipify = self.callPackage ./hipify { + inherit (llvm) + clang + rocm-merged-llvm + ; + }; - paths = [ - rocm-core - llvm.clang - llvm.mlir - llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp) - rocm-language-runtime - ]; - }; + # hsakmt was merged into rocm-runtime + hsakmt = self.rocm-runtime; - rocm-opencl-sdk = symlinkJoin { - name = "rocm-opencl-sdk-meta"; + rocprofiler = self.callPackage ./rocprofiler { + inherit (llvm) clang; + }; + rocprofiler-register = self.callPackage ./rocprofiler-register { + inherit (llvm) clang; + }; - paths = [ - rocm-core - rocm-runtime - clr - clr.icd - rocm-thunk - rocm-opencl-runtime - ]; - }; + # Needs GCC + roctracer = self.callPackage ./roctracer { }; - rocm-opencl-runtime = symlinkJoin { - name = "rocm-opencl-runtime-meta"; + rocgdb = self.callPackage ./rocgdb { }; - paths = [ - rocm-core - clr - clr.icd - rocm-language-runtime - ]; - }; + rocdbgapi = self.callPackage ./rocdbgapi { }; - rocm-hip-runtime-devel = symlinkJoin { - name = "rocm-hip-runtime-devel-meta"; + rocr-debug-agent = self.callPackage ./rocr-debug-agent { }; - paths = [ - clr - rocm-core - hipify - rocm-cmake - llvm.clang - llvm.mlir - llvm.openmp - rocm-thunk - rocm-runtime - rocm-hip-runtime - ]; - }; + rocprim = self.callPackage ./rocprim { }; - rocm-hip-runtime = symlinkJoin { - name = "rocm-hip-runtime-meta"; + rocsparse = self.callPackage ./rocsparse { }; - paths = [ - rocm-core - rocminfo - clr - rocm-language-runtime - ]; - }; + rocthrust = self.callPackage ./rocthrust { }; - rocm-language-runtime = symlinkJoin { - name = "rocm-language-runtime-meta"; + rocrand = self.callPackage ./rocrand { }; - paths = [ - rocm-runtime - rocm-core - rocm-comgr - llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp) - ]; - }; + hiprand = self.callPackage ./hiprand { }; - rocm-all = symlinkJoin { - name = "rocm-all-meta"; + rocfft = self.callPackage ./rocfft { }; - paths = [ - rocm-developer-tools - rocm-ml-sdk - rocm-ml-libraries - rocm-hip-sdk - rocm-hip-libraries - rocm-openmp-sdk - rocm-opencl-sdk - rocm-opencl-runtime - rocm-hip-runtime-devel - rocm-hip-runtime - rocm-language-runtime - ]; - }; - }; -} // lib.optionalAttrs config.allowAliases { - miopengemm= throw '' - 'miopengemm' has been deprecated. - It is still available for some time as part of rocmPackages_5. - ''; # Added 2024-3-3 + mscclpp = self.callPackage ./mscclpp { }; - miopen-opencl= throw '' - 'miopen-opencl' has been deprecated. - It is still available for some time as part of rocmPackages_5. - ''; # Added 2024-3-3 + rccl = self.callPackage ./rccl { }; - mivisionx-opencl = throw '' - 'mivisionx-opencl' has been deprecated. - Other versions of mivisionx are still available. - It is also still available for some time as part of rocmPackages_5. - ''; # Added 2024-3-24 + # RCCL with sanitizers and tests + # Can't have with sanitizer build as dep of other packages without + # runtime crashes due to ASAN not loading first + rccl-tests = self.callPackage ./rccl { + buildTests = true; + }; + + hipcub = self.callPackage ./hipcub { }; + + hipsparse = self.callPackage ./hipsparse { }; + + hipfort = self.callPackage ./hipfort { }; + + hipfft = self.callPackage ./hipfft { }; + + tensile = pyPackages.callPackage ./tensile { + inherit (self) + rocmUpdateScript + clr + ; + }; + + rocblas = self.callPackage ./rocblas { + buildTests = true; + buildBenchmarks = true; + }; + + rocsolver = self.callPackage ./rocsolver { }; + + rocwmma = self.callPackage ./rocwmma { }; + + rocalution = self.callPackage ./rocalution { }; + + rocmlir = self.callPackage ./rocmlir { + buildRockCompiler = true; + }; + + hipsolver = self.callPackage ./hipsolver { }; + + hipblas-common = self.callPackage ./hipblas-common { }; + + hipblas = self.callPackage ./hipblas { }; + + hipblaslt = self.callPackage ./hipblaslt { }; + + # hipTensor - Only supports GFX9 + + composable_kernel_build = self.callPackage ./composable_kernel { }; + + # FIXME: we have compressed code objects now, may be able to skip two stages? + composable_kernel = self.callPackage ./composable_kernel/unpack.nix { }; + ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix { + inherit (self) composable_kernel_build; + inherit (llvm) rocm-merged-llvm; + }; + + half = self.callPackage ./half { }; + + miopen = self.callPackage ./miopen { + boost = boost179.override { enableStatic = true; }; + }; + + miopen-hip = self.miopen; + + migraphx = self.callPackage ./migraphx { }; + + rpp = self.callPackage ./rpp { }; + + rpp-hip = self.rpp.override { + useOpenCL = false; + useCPU = false; + }; + + rpp-opencl = self.rpp.override { + useOpenCL = true; + useCPU = false; + }; + + rpp-cpu = self.rpp.override { + useOpenCL = false; + useCPU = true; + }; + + mivisionx = self.callPackage ./mivisionx { + opencv = opencv.override { enablePython = true; }; + # TODO: Remove this pin in ROCm 6.4+ + # FFMPEG support was improved in https://github.com/ROCm/MIVisionX/pull/1460 + ffmpeg = ffmpeg_4; + # Unfortunately, rocAL needs a custom libjpeg-turbo until further notice + # See: https://github.com/ROCm/MIVisionX/issues/1051 + libjpeg_turbo = libjpeg_turbo.overrideAttrs { + version = "2.0.6.1"; + src = fetchFromGitHub { + owner = "rrawther"; + repo = "libjpeg-turbo"; + rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb"; + sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY="; + }; + # overwrite all patches, since patches for newer version do not apply + patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ]; + }; + }; + + mivisionx-hip = self.mivisionx.override { + rpp = self.rpp-hip; + useOpenCL = false; + useCPU = false; + }; + + mivisionx-cpu = self.mivisionx.override { + rpp = self.rpp-cpu; + useOpenCL = false; + useCPU = true; + }; + + # Even if config.rocmSupport is false we need rocmSupport true + # version of ucc/ucx in openmpi in this package set + openmpi = openmpi-orig.override ( + prev: + let + ucx = prev.ucx.override { + enableCuda = false; + enableRocm = true; + }; + in + { + inherit ucx; + ucc = prev.ucc.override { + enableCuda = false; + inherit ucx; + }; + } + ); + mpi = self.openmpi; + + triton-llvm = triton-llvm.overrideAttrs { + src = fetchFromGitHub { + owner = "llvm"; + repo = "llvm-project"; + # make sure this matches triton llvm rel branch hash for now + # https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt + rev = "86b69c31642e98f8357df62c09d118ad1da4e16a"; + hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE="; + }; + pname = "triton-llvm-rocm"; + patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase + }; + + triton = pyPackages.callPackage ./triton { rocmPackages = self; }; + + ## Meta ## + # Emulate common ROCm meta layout + # These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations + # Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues! + # See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png + # See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html + meta = with self; rec { + rocm-developer-tools = symlinkJoin { + name = "rocm-developer-tools-meta"; + paths = [ + hsa-amd-aqlprofile-bin + rocm-core + rocr-debug-agent + roctracer + rocdbgapi + rocprofiler + rocgdb + rocm-language-runtime + ]; + }; + rocm-ml-sdk = symlinkJoin { + name = "rocm-ml-sdk-meta"; + paths = [ + rocm-core + miopen-hip + rocm-hip-sdk + rocm-ml-libraries + ]; + }; + rocm-ml-libraries = symlinkJoin { + name = "rocm-ml-libraries-meta"; + paths = [ + llvm.clang + llvm.mlir + llvm.openmp + rocm-core + miopen-hip + rocm-hip-libraries + ]; + }; + rocm-hip-sdk = symlinkJoin { + name = "rocm-hip-sdk-meta"; + paths = [ + rocprim + rocalution + hipfft + rocm-core + hipcub + hipblas + hipblaslt + rocrand + rocfft + rocsparse + rccl + rocthrust + rocblas + hipsparse + hipfort + rocwmma + hipsolver + rocsolver + rocm-hip-libraries + rocm-hip-runtime-devel + ]; + }; + rocm-hip-libraries = symlinkJoin { + name = "rocm-hip-libraries-meta"; + paths = [ + rocblas + hipfort + rocm-core + rocsolver + rocalution + rocrand + hipblas + hipblaslt + rocfft + hipfft + rccl + rocsparse + hipsparse + hipsolver + rocm-hip-runtime + ]; + }; + rocm-openmp-sdk = symlinkJoin { + name = "rocm-openmp-sdk-meta"; + paths = [ + rocm-core + llvm.clang + llvm.mlir + llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp) + rocm-language-runtime + ]; + }; + rocm-opencl-sdk = symlinkJoin { + name = "rocm-opencl-sdk-meta"; + paths = [ + rocm-core + rocm-runtime + clr + clr.icd + rocm-opencl-runtime + ]; + }; + rocm-opencl-runtime = symlinkJoin { + name = "rocm-opencl-runtime-meta"; + paths = [ + rocm-core + clr + clr.icd + rocm-language-runtime + ]; + }; + rocm-hip-runtime-devel = symlinkJoin { + name = "rocm-hip-runtime-devel-meta"; + paths = [ + clr + rocm-core + hipify + rocm-cmake + llvm.clang + llvm.mlir + llvm.openmp + rocm-runtime + rocm-hip-runtime + ]; + }; + rocm-hip-runtime = symlinkJoin { + name = "rocm-hip-runtime-meta"; + paths = [ + rocm-core + rocminfo + clr + rocm-language-runtime + ]; + }; + rocm-language-runtime = symlinkJoin { + name = "rocm-language-runtime-meta"; + paths = [ + rocm-runtime + rocm-core + rocm-comgr + llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp) + ]; + }; + rocm-all = symlinkJoin { + name = "rocm-all-meta"; + paths = [ + rocm-developer-tools + rocm-ml-sdk + rocm-ml-libraries + rocm-hip-sdk + rocm-hip-libraries + rocm-openmp-sdk + rocm-opencl-sdk + rocm-opencl-runtime + rocm-hip-runtime-devel + rocm-hip-runtime + rocm-language-runtime + ]; + }; + }; + + rocm-tests = self.callPackage ./rocm-tests { + rocmPackages = self; + }; + } + // lib.optionalAttrs config.allowAliases { + rocm-thunk = throw '' + 'rocm-thunk' has been removed. It's now part of the ROCm runtime. + ''; # Added 2025-3-16 + + clang-ocl = throw '' + 'clang-ocl' has been deprecated upstream. Use ROCm's clang directly. + ''; # Added 2025-3-16 + + miopengemm = throw '' + 'miopengemm' has been deprecated. + It is still available for some time as part of rocmPackages_5. + ''; # Added 2024-3-3 + + miopen-opencl = throw '' + 'miopen-opencl' has been deprecated. + It is still available for some time as part of rocmPackages_5. + ''; # Added 2024-3-3 + + mivisionx-opencl = throw '' + 'mivisionx-opencl' has been deprecated. + Other versions of mivisionx are still available. + It is also still available for some time as part of rocmPackages_5. + ''; # Added 2024-3-24 + } + ); + scopeForArches = + arches: + outer.overrideScope ( + _final: prev: { + clr = prev.clr.override { + localGpuTargets = arches; + }; + } + ); +in +outer +// builtins.listToAttrs ( + builtins.map (arch: { + name = arch; + value = scopeForArches [ arch ]; + }) outer.clr.gpuTargets +) +// { + gfx9 = scopeForArches [ + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + ]; + gfx10 = scopeForArches [ + "gfx1010" + "gfx1030" + ]; + gfx11 = scopeForArches [ + "gfx1100" + "gfx1101" + "gfx1102" + ]; } diff --git a/pkgs/development/rocm-modules/6/half/default.nix b/pkgs/development/rocm-modules/6/half/default.nix index 77f37790a890..303f9b957fc8 100644 --- a/pkgs/development/rocm-modules/6/half/default.nix +++ b/pkgs/development/rocm-modules/6/half/default.nix @@ -1,20 +1,21 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, }: stdenv.mkDerivation (finalAttrs: { pname = "half"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "half"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-wvl8ny7pbY9hUGGtJ70R7/4YIsahgI7qcVzUnxmUfZM="; + hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc="; }; nativeBuildInputs = [ @@ -24,8 +25,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -34,6 +35,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.unix; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hip-common/default.nix b/pkgs/development/rocm-modules/6/hip-common/default.nix index 8b0a4fc5add2..b9292936099b 100644 --- a/pkgs/development/rocm-modules/6/hip-common/default.nix +++ b/pkgs/development/rocm-modules/6/hip-common/default.nix @@ -1,18 +1,21 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, }: stdenv.mkDerivation (finalAttrs: { pname = "hip-common"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "HIP"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-51u3By0R4LKoWiklNacFP6HILL845jxpN6FD7rQB+zQ="; + hash = "sha256-y85S2fULvbQfwxZukIsMLuQAqWEv1kHL8fdozK4kj5I="; + # rev = "5f2d2d109c34e749d7947b48834098eec26a5e67"; + # hash = "sha256-Lws65mzRJZP/JE9UiHHfX4Y3zOYA6FPxgbAea48D9Gk="; }; dontConfigure = true; @@ -29,8 +32,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -39,6 +42,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipblas-common/default.nix b/pkgs/development/rocm-modules/6/hipblas-common/default.nix new file mode 100644 index 000000000000..8981c89c503a --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipblas-common/default.nix @@ -0,0 +1,29 @@ +{ + lib, + stdenv, + cmake, + fetchFromGitHub, + rocm-cmake, + rocmUpdateScript, +}: +stdenv.mkDerivation (finalAttrs: { + pname = "hipblas-common"; + version = "6.3.1"; + nativeBuildInputs = [ + cmake + rocm-cmake + ]; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + meta = with lib; { + description = "Common files shared by hipBLAS and hipBLASLt"; + homepage = "https://github.com/ROCm/hipBLASlt"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/development/rocm-modules/6/hipblas/default.nix b/pkgs/development/rocm-modules/6/hipblas/default.nix index 3e4436b7d3f7..1a486a80de4d 100644 --- a/pkgs/development/rocm-modules/6/hipblas/default.nix +++ b/pkgs/development/rocm-modules/6/hipblas/default.nix @@ -2,13 +2,17 @@ lib, stdenv, fetchFromGitHub, + fetchpatch, rocmUpdateScript, cmake, rocm-cmake, clr, gfortran, + hipblas-common, rocblas, rocsolver, + rocsparse, + rocprim, gtest, lapack-reference, buildTests ? false, @@ -19,7 +23,7 @@ # Can also use cuBLAS stdenv.mkDerivation (finalAttrs: { pname = "hipblas"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -39,9 +43,23 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipBLAS"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-Fq7o2sMmHlHIv9UKJw+u/h9K/ZhKVJWwosYTdYIsscA="; + hash = "sha256-Rz1KAhBUbvErHTF2PM1AkVhqo4OHldfSNMSpp5Tx9yk="; }; + patches = [ + # https://github.com/ROCm/hipBLAS/pull/952 + (fetchpatch { + name = "transitively-depend-hipblas-common.patch"; + url = "https://github.com/ROCm/hipBLAS/commit/54220fdaebf0fb4fd0921ee9e418ace5b143ec8f.patch"; + hash = "sha256-MFEhv8Bkrd2zD0FFIDg9oJzO7ztdyMAF+R9oYA0rmwQ="; + }) + ]; + + postPatch = '' + substituteInPlace library/CMakeLists.txt \ + --replace-fail "find_package(Git REQUIRED)" "" + ''; + nativeBuildInputs = [ cmake rocm-cmake @@ -49,9 +67,13 @@ stdenv.mkDerivation (finalAttrs: { gfortran ]; + propagatedBuildInputs = [ hipblas-common ]; + buildInputs = [ rocblas + rocprim + rocsparse rocsolver ] ++ lib.optionals buildTests [ @@ -63,13 +85,16 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_CXX_COMPILER=${lib.getExe' clr "hipcc"}" + # Upstream is migrating to amdclang++, it is likely this will be correct in next version bump + #"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}" ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" @@ -100,8 +125,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -110,8 +135,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipblaslt/default.nix b/pkgs/development/rocm-modules/6/hipblaslt/default.nix new file mode 100644 index 000000000000..fe6f22a74cc3 --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipblaslt/default.nix @@ -0,0 +1,235 @@ +{ + lib, + stdenv, + fetchpatch, + fetchFromGitHub, + cmake, + rocm-cmake, + clr, + gfortran, + gtest, + msgpack, + libxml2, + python3, + python3Packages, + openmp, + hipblas-common, + tensile, + lapack-reference, + ncurses, + libffi, + zlib, + zstd, + rocmUpdateScript, + buildTests ? false, + buildBenchmarks ? false, + buildSamples ? false, + # hipblaslt supports only devices with MFMA or WMMA + # WMMA on gfx1100 may be broken + # MFMA on MI100 may be broken + # MI200/MI300 known to work + gpuTargets ? ( + clr.localGpuTargets or [ + # "gfx908" FIXME: confirm MFMA on MI100 works + "gfx90a" + "gfx942" + # "gfx1100" FIXME: confirm WMMA targets work + ] + ), +}: + +stdenv.mkDerivation ( + finalAttrs: + let + supportsTargetArches = + (builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets) + || (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets); + tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs { + inherit (finalAttrs) src; + sourceRoot = "${finalAttrs.src.name}/tensilelite"; + }; + py = python3.withPackages (ps: [ + ps.pyyaml + ps.setuptools + ps.packaging + ]); + gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets); + compiler = "amdclang++"; + cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly + in + { + pname = "hipblaslt${clr.gpuArchSuffix}"; + version = "6.3.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "hipBLASLt"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-ozfHwsxcczzYXN9SIkyfRvdtaCqlDN4bh3UHZNS2oVQ="; + }; + env.CXX = compiler; + env.CFLAGS = cFlags; + env.CXXFLAGS = cFlags; + env.ROCM_PATH = "${clr}"; + env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++"; + env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++"; + # Some tensile scripts look for this as an env var rather than a cmake flag + env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++"; + requiredSystemFeatures = [ "big-parallel" ]; + + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ] + ++ lib.optionals buildSamples [ + "sample" + ]; + + postPatch = '' + mkdir -p build/Tensile/library + # git isn't needed and we have no .git + substituteInPlace cmake/Dependencies.cmake \ + --replace-fail "find_package(Git REQUIRED)" "" + substituteInPlace CMakeLists.txt \ + --replace-fail "include(virtualenv)" "" \ + --replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \ + --replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \ + --replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \ + --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' + # FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand + # Working around for now by disabling the ExtOp libs + substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \ + --replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries' + substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \ + --replace-fail '${"\${rocm_path}"}/bin/' "" + ''; + + # Apply patches to allow building without a target arch if we need to do that + patches = lib.optionals (!supportsTargetArches) [ + # Add ability to build without specitying any arch. + (fetchpatch { + sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk="; + url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch"; + }) + # Followup to above patch for 6.3.x + (fetchpatch { + sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10="; + url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch"; + }) + ]; + + doCheck = false; + doInstallCheck = false; + + nativeBuildInputs = [ + cmake + rocm-cmake + py + clr + gfortran + # need make to get streaming console output so nix knows build is still running + # so deliberately not using ninja + ]; + + buildInputs = + [ + hipblas-common + tensile' + openmp + libffi + ncurses + + # Tensile deps - not optional, building without tensile isn't actually supported + msgpack # FIXME: not included in cmake! + libxml2 + python3Packages.msgpack + python3Packages.joblib + zlib + zstd + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals (buildTests || buildBenchmarks) [ + lapack-reference + ]; + + cmakeFlags = + [ + "-Wno-dev" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}" + "-DTENSILE_USE_HIP=ON" + "-DTENSILE_BUILD_CLIENT=OFF" + "-DTENSILE_USE_FLOAT16_BUILTIN=ON" + "-DCMAKE_CXX_COMPILER=${compiler}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DHIPBLASLT_ENABLE_MARKER=Off" + # FIXME what are the implications of hardcoding this? + "-DTensile_CODE_OBJECT_VERSION=V5" + "-DTensile_COMPILER=${compiler}" + "-DAMDGPU_TARGETS=${gpuTargets'}" + "-DGPU_TARGETS=${gpuTargets'}" + "-DTensile_LIBRARY_FORMAT=msgpack" + ] + ++ lib.optionals (!supportsTargetArches) [ + "-DBUILD_WITH_TENSILE=OFF" + ] + ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] + ++ lib.optionals buildSamples [ + "-DBUILD_CLIENTS_SAMPLES=ON" + ]; + + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/hipblas-test $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/hipblas-bench $benchmark/bin + '' + + lib.optionalString buildSamples '' + mkdir -p $sample/bin + mv $out/bin/example-* $sample/bin + '' + + lib.optionalString (buildTests || buildBenchmarks || buildSamples) '' + rmdir $out/bin + ''; + # If this is false there are no kernels in the output lib + # and it's useless at runtime + # so if it's an optional dep it's best to not depend on it + # Some packages like torch need hipblaslt to compile + # and are fine ignoring it at runtime if it's not supported + # so we have to support building an empty hipblaslt + passthru.supportsTargetArches = supportsTargetArches; + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + inherit (finalAttrs.src) owner repo; + }; + passthru.tensilelite = tensile'; + meta = with lib; { + description = "hipBLASLt is a library that provides general matrix-matrix operations with a flexible API"; + homepage = "https://github.com/ROCm/hipBLASlt"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; + } +) diff --git a/pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff b/pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff new file mode 100644 index 000000000000..87438bf6a5e5 --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipblaslt/ext-op-first.diff @@ -0,0 +1,22 @@ +diff --git a/library/src/amd_detail/rocblaslt/src/CMakeLists.txt b/library/src/amd_detail/rocblaslt/src/CMakeLists.txt +index 3d5ace35..8c5a3841 100644 +--- a/library/src/amd_detail/rocblaslt/src/CMakeLists.txt ++++ b/library/src/amd_detail/rocblaslt/src/CMakeLists.txt +@@ -58,6 +58,8 @@ if( BUILD_WITH_TENSILE ) + set(Tensile_Options ${Tensile_Options} LAZY_LIBRARY_LOADING) + endif() + ++ #TensileCreateExtOpLibraries("${PROJECT_BINARY_DIR}/Tensile/library" "${Tensile_ARCHITECTURE}") ++ + # Add a build target for Tensile kernel library + # Runtime language is HIP by default + # warning our Tensile_ variables may shadow variable in TensileCreateLibraryFiles +@@ -86,8 +88,6 @@ if( BUILD_WITH_TENSILE ) + ) + endif() + +- TensileCreateExtOpLibraries("${PROJECT_BINARY_DIR}/Tensile/library" "${Tensile_ARCHITECTURE}") +- + # Create a unique name for TensileHost compiled for rocBLAS + set_target_properties( TensileHost PROPERTIES OUTPUT_NAME rocblaslt-tensile CXX_EXTENSIONS NO ) + diff --git a/pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch b/pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch new file mode 100644 index 000000000000..bfc386c620cb --- /dev/null +++ b/pkgs/development/rocm-modules/6/hipcc/0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch @@ -0,0 +1,39 @@ +From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001 +From: Luna Nova +Date: Fri, 11 Oct 2024 02:56:22 -0700 +Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived + platforms + +Fixes UB when hipBinUtilPtr_ is used. +--- + amd/hipcc/src/hipBin_amd.h | 1 - + amd/hipcc/src/hipBin_nvidia.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h +index 0a782d1beab9..36cd625ae8bc 100644 +--- a/src/hipBin_amd.h ++++ b/src/hipBin_amd.h +@@ -42,7 +42,6 @@ THE SOFTWARE. + + class HipBinAmd : public HipBinBase { + private: +- HipBinUtil* hipBinUtilPtr_; + string hipClangPath_ = ""; + string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_; + PlatformInfo platformInfoAMD_; +diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h +index ff142cc1cea2..09b7b80979c7 100644 +--- a/src/hipBin_nvidia.h ++++ b/src/hipBin_nvidia.h +@@ -31,7 +31,6 @@ THE SOFTWARE. + + class HipBinNvidia : public HipBinBase { + private: +- HipBinUtil* hipBinUtilPtr_; + string cudaPath_ = ""; + PlatformInfo platformInfoNV_; + string hipCFlags_, hipCXXFlags_, hipLdFlags_; +-- +2.46.0 + diff --git a/pkgs/development/rocm-modules/6/hipcc/default.nix b/pkgs/development/rocm-modules/6/hipcc/default.nix index 5f2ac080cb50..9eaa072b931d 100644 --- a/pkgs/development/rocm-modules/6/hipcc/default.nix +++ b/pkgs/development/rocm-modules/6/hipcc/default.nix @@ -1,49 +1,46 @@ { lib, stdenv, - fetchFromGitHub, - rocmUpdateScript, + rocm-merged-llvm, cmake, lsb-release, }: stdenv.mkDerivation (finalAttrs: { pname = "hipcc"; - version = "6.0.2"; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "HIPCC"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-/LRQN+RSMBPk2jS/tdp3psUL/B0RJZQhRri7e67KsG4="; - }; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; + sourceRoot = "${finalAttrs.src.name}/amd/hipcc"; nativeBuildInputs = [ cmake ]; + buildInputs = [ rocm-merged-llvm ]; + + patches = [ + # https://github.com/ROCm/llvm-project/pull/183 + # Fixes always-invoked UB in hipcc + ./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch + ]; + postPatch = '' substituteInPlace src/hipBin_amd.h \ - --replace "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release" + --replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release" ''; + cmakeFlags = [ + "-DCMAKE_BUILD_TYPE=Release" + ]; postInstall = '' rm -r $out/hip/bin ln -s $out/bin $out/hip/bin ''; - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - meta = with lib; { description = "Compiler driver utility that calls clang or nvcc"; homepage = "https://github.com/ROCm/HIPCC"; license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipcub/default.nix b/pkgs/development/rocm-modules/6/hipcub/default.nix index 8f6f97ed5f20..f191cc1d0134 100644 --- a/pkgs/development/rocm-modules/6/hipcub/default.nix +++ b/pkgs/development/rocm-modules/6/hipcub/default.nix @@ -1,36 +1,40 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, rocprim -, clr -, gtest -, gbenchmark -, buildTests ? false -, buildBenchmarks ? false -, gpuTargets ? [ ] +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + rocprim, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? [ ], }: # CUB can also be used as a backend instead of rocPRIM. stdenv.mkDerivation (finalAttrs: { pname = "hipcub"; - version = "6.0.2"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "hipCUB"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-8QzVgj0JSb86zEG3sj5AAt9pG3frw+xrjEOTo7xCIrc="; + hash = "sha256-uECOQWG9C64tg5YZdm9/3+fZXaZVGslu8vElK3m23GY="; }; nativeBuildInputs = [ @@ -39,44 +43,53 @@ stdenv.mkDerivation (finalAttrs: { clr ]; - buildInputs = [ - rocprim - ] ++ lib.optionals buildTests [ - gtest - ] ++ lib.optionals buildBenchmarks [ - gbenchmark - ]; + buildInputs = + [ + rocprim + ] + ++ lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; - cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" - "-DHIP_ROOT_DIR=${clr}" - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] ++ lib.optionals buildTests [ - "-DBUILD_TEST=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_BENCHMARK=ON" - ]; + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/test_* $test/bin - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - mv $out/bin/benchmark_* $benchmark/bin - '' + lib.optionalString (buildTests || buildBenchmarks) '' - rmdir $out/bin - ''; + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -85,6 +98,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ bsd3 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipfft/default.nix b/pkgs/development/rocm-modules/6/hipfft/default.nix index bc9e26bd9f11..76cbf83883cc 100644 --- a/pkgs/development/rocm-modules/6/hipfft/default.nix +++ b/pkgs/development/rocm-modules/6/hipfft/default.nix @@ -22,7 +22,7 @@ # Can also use cuFFT stdenv.mkDerivation (finalAttrs: { pname = "hipfft"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -42,7 +42,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipFFT"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-DjjNQryJdl7RmaMQRQPWkleweEWMIwH/xXU84GGjoC0="; + hash = "sha256-Jq/YHEtOo7a0/Ki7gxZATKmSqPU6cyLf5gx3A4MAZNw="; fetchSubmodules = true; }; @@ -111,8 +111,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -121,8 +121,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipfort/default.nix b/pkgs/development/rocm-modules/6/hipfort/default.nix index 73f583f1fe1d..2e0b1769cb51 100644 --- a/pkgs/development/rocm-modules/6/hipfort/default.nix +++ b/pkgs/development/rocm-modules/6/hipfort/default.nix @@ -1,21 +1,22 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, gfortran +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + gfortran, }: stdenv.mkDerivation (finalAttrs: { pname = "hipfort"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "hipfort"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-3PIqSDyDlY0oVSEx20EPlKGYNkc9xPZtIG3Sbw69esE="; + hash = "sha256-cokHxyb4NDMHeq7RIVz7PBuUKRIHyGdZgDgF6Za4fHM="; }; nativeBuildInputs = [ @@ -51,8 +52,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -61,6 +62,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipify/default.nix b/pkgs/development/rocm-modules/6/hipify/default.nix index 29109a701856..edc05361a50e 100644 --- a/pkgs/development/rocm-modules/6/hipify/default.nix +++ b/pkgs/development/rocm-modules/6/hipify/default.nix @@ -1,39 +1,57 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, clang -, libxml2 +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clang, + libxml2, + rocm-merged-llvm, + zlib, + zstd, + perl, }: stdenv.mkDerivation (finalAttrs: { pname = "hipify"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "HIPIFY"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-nNyWrPPhUwT7FyASzc3kf5NCTzeqvHybVOc+6hBzkA4="; + hash = "sha256-o/1LNsNtAyQcSug1gf7ujGNRRbvC33kwldrJKZi2LA0="; }; - nativeBuildInputs = [ cmake ]; - buildInputs = [ libxml2 ]; + nativeBuildInputs = [ + cmake + ]; + + buildInputs = [ + libxml2 + rocm-merged-llvm + zlib + zstd + perl + ]; postPatch = '' substituteInPlace CMakeLists.txt \ --replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang" + chmod +x bin/* ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; postInstall = '' - patchShebangs $out/bin + chmod +x $out/bin/* + chmod +x $out/libexec/* + patchShebangs $out/bin/ + patchShebangs $out/libexec/ ''; meta = with lib; { @@ -42,6 +60,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hiprand/default.nix b/pkgs/development/rocm-modules/6/hiprand/default.nix index 796e73c4e9e2..13446523c5ef 100644 --- a/pkgs/development/rocm-modules/6/hiprand/default.nix +++ b/pkgs/development/rocm-modules/6/hiprand/default.nix @@ -14,7 +14,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "hiprand"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -28,7 +28,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipRAND"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-uGHzOhUX5JEknVFwhHhWFdPmwLS/TuaXYMeItS7tXIg="; + hash = "sha256-TVc+qFwRiS5tAo1OKI1Wu5hadlwPZmSVZ9SvVvH1w7Y="; }; nativeBuildInputs = [ @@ -41,8 +41,6 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" "-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 @@ -67,8 +65,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -77,8 +75,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipsolver/default.nix b/pkgs/development/rocm-modules/6/hipsolver/default.nix index de2e3a049b65..4f7bc2e46af5 100644 --- a/pkgs/development/rocm-modules/6/hipsolver/default.nix +++ b/pkgs/development/rocm-modules/6/hipsolver/default.nix @@ -9,6 +9,8 @@ gfortran, rocblas, rocsolver, + rocsparse, + suitesparse, gtest, lapack-reference, buildTests ? false, @@ -19,7 +21,7 @@ # Can also use cuSOLVER stdenv.mkDerivation (finalAttrs: { pname = "hipsolver"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -39,7 +41,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipSOLVER"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-iMfaOv4TdTkmaRHCZOuqUfjO081J6on71+s8nIwwV00="; + hash = "sha256-ZQUKU3L4DgZ5zM7pCYEix0ulRkl78x/5wJnyCndTAwk="; }; nativeBuildInputs = [ @@ -53,6 +55,8 @@ stdenv.mkDerivation (finalAttrs: { [ rocblas rocsolver + rocsparse + suitesparse ] ++ lib.optionals buildTests [ gtest @@ -63,13 +67,13 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" @@ -101,8 +105,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -111,8 +115,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hipsparse/default.nix b/pkgs/development/rocm-modules/6/hipsparse/default.nix index 20257d629a00..ec6e467803cf 100644 --- a/pkgs/development/rocm-modules/6/hipsparse/default.nix +++ b/pkgs/development/rocm-modules/6/hipsparse/default.nix @@ -12,6 +12,7 @@ gtest, openmp, buildTests ? false, + buildBenchmarks ? false, buildSamples ? false, gpuTargets ? [ ], }: @@ -19,7 +20,7 @@ # This can also use cuSPARSE as a backend instead of rocSPARSE stdenv.mkDerivation (finalAttrs: { pname = "hipsparse"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -36,7 +37,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "hipSPARSE"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-fi5b0IF++OiezpM3JuUkhwpmW2apeFH4r5g6CcFseNY="; + hash = "sha256-3a7fKpYyiqG3aGOg7YrTHmKoH4rgTVLD16DvrZ3YY1g="; }; nativeBuildInputs = [ @@ -51,7 +52,7 @@ stdenv.mkDerivation (finalAttrs: { rocsparse git ] - ++ lib.optionals buildTests [ + ++ lib.optionals (buildTests || buildBenchmarks) [ gtest ] ++ lib.optionals (buildTests || buildSamples) [ @@ -60,20 +61,17 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - "-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) + (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) + (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples) ] ++ lib.optionals (gpuTargets != [ ]) [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] - ++ lib.optionals buildTests [ - "-DBUILD_CLIENTS_TESTS=ON" ]; # We have to manually generate the matrices @@ -140,8 +138,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -150,8 +148,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix b/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix index fb2f2c84379b..1dcd91b3fc40 100644 --- a/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix +++ b/pkgs/development/rocm-modules/6/hsa-amd-aqlprofile-bin/default.nix @@ -4,22 +4,24 @@ fetchurl, callPackage, dpkg, + rocm-core, }: stdenv.mkDerivation (finalAttrs: { pname = "hsa-amd-aqlprofile-bin"; - version = "6.0.2"; + version = "6.3.0"; src = let - version = finalAttrs.version; - dotless = builtins.replaceStrings [ "." ] [ "0" ] version; - incremental = "115"; + inherit (finalAttrs) version; + patch = rocm-core.ROCM_LIBPATCH_VERSION; + majorMinor = lib.versions.major version + "." + lib.versions.minor version; + incremental = "39"; osRelease = "22.04"; in fetchurl { - url = "https://repo.radeon.com/rocm/apt/${version}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${dotless}.${dotless}-${incremental}~${osRelease}_amd64.deb"; - hash = "sha256-0XeKUKaof5pSMS/UgLwumBDBYgyH/pCex9jViUKENXY="; + url = "https://repo.radeon.com/rocm/apt/${majorMinor}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb"; + hash = "sha256-ghgz5ZgWopgLJcK4Vbwm6zlny3IwxzWz9V0Fuwu35R0="; }; nativeBuildInputs = [ dpkg ]; @@ -31,7 +33,7 @@ stdenv.mkDerivation (finalAttrs: { runHook preInstall mkdir -p $out - cp -a opt/rocm-${finalAttrs.version}/* $out + cp -a opt/rocm-${finalAttrs.version}*/* $out chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.* chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so @@ -46,8 +48,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ unfree ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/llvm/base.nix b/pkgs/development/rocm-modules/6/llvm/base.nix deleted file mode 100644 index 8b85ae53bcaa..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/base.nix +++ /dev/null @@ -1,216 +0,0 @@ -{ - lib, - stdenv, - gcc12Stdenv, - fetchFromGitHub, - rocmUpdateScript, - pkg-config, - cmake, - ninja, - git, - doxygen, - sphinx, - lit, - libxml2, - libxcrypt, - libedit, - libffi, - mpfr, - zlib, - ncurses, - python3Packages, - buildDocs ? true, - buildMan ? true, - buildTests ? true, - targetName ? "llvm", - targetDir ? "llvm", - targetProjects ? [ ], - targetRuntimes ? [ ], - llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv - extraPatches ? [ ], - extraNativeBuildInputs ? [ ], - extraBuildInputs ? [ ], - extraCMakeFlags ? [ ], - extraPostPatch ? "", - checkTargets ? [ - (lib.optionalString buildTests (if targetDir == "runtimes" then "check-runtimes" else "check-all")) - ], - extraPostInstall ? "", - hardeningDisable ? [ ], - requiredSystemFeatures ? [ ], - extraLicenses ? [ ], - isBroken ? false, -}: - -let - stdenv' = stdenv; -in -let - stdenv = - if stdenv'.cc.cc.isGNU or false && lib.versionAtLeast stdenv'.cc.cc.version "13.0" then - gcc12Stdenv - else - stdenv'; -in - -let - llvmNativeTarget = - if stdenv.hostPlatform.isx86_64 then - "X86" - else if stdenv.hostPlatform.isAarch64 then - "AArch64" - else - throw "Unsupported ROCm LLVM platform"; - inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t; - llvmTargetsToBuild' = [ "AMDGPU" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild; -in -stdenv.mkDerivation (finalAttrs: { - pname = "rocm-llvm-${targetName}"; - version = "6.0.2"; - - outputs = - [ - "out" - ] - ++ lib.optionals buildDocs [ - "doc" - ] - ++ lib.optionals buildMan [ - "man" - "info" # Avoid `attribute 'info' missing` when using with wrapCC - ]; - - patches = extraPatches; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "llvm-project"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-uGxalrwMNCOSqSFVrYUBi3ijkMEFFTrzFImmvZKQf6I="; - }; - - nativeBuildInputs = - [ - pkg-config - cmake - ninja - git - (python3Packages.python.withPackages (p: [ p.setuptools ])) - ] - ++ lib.optionals (buildDocs || buildMan) [ - doxygen - sphinx - python3Packages.recommonmark - ] - ++ lib.optionals (buildTests && !finalAttrs.passthru.isLLVM) [ - lit - ] - ++ extraNativeBuildInputs; - - buildInputs = [ - libxml2 - libxcrypt - libedit - libffi - mpfr - ] ++ extraBuildInputs; - - propagatedBuildInputs = lib.optionals finalAttrs.passthru.isLLVM [ - zlib - ncurses - ]; - - sourceRoot = "${finalAttrs.src.name}/${targetDir}"; - - cmakeFlags = - [ - "-DLLVM_TARGETS_TO_BUILD=${builtins.concatStringsSep ";" llvmTargetsToBuild'}" - ] - ++ lib.optionals (finalAttrs.passthru.isLLVM && targetProjects != [ ]) [ - "-DLLVM_ENABLE_PROJECTS=${lib.concatStringsSep ";" targetProjects}" - ] - ++ - lib.optionals ((finalAttrs.passthru.isLLVM || targetDir == "runtimes") && targetRuntimes != [ ]) - [ - "-DLLVM_ENABLE_RUNTIMES=${lib.concatStringsSep ";" targetRuntimes}" - ] - ++ lib.optionals finalAttrs.passthru.isLLVM [ - "-DLLVM_INSTALL_UTILS=ON" - "-DLLVM_INSTALL_GTEST=ON" - ] - ++ lib.optionals (buildDocs || buildMan) [ - "-DLLVM_INCLUDE_DOCS=ON" - "-DLLVM_BUILD_DOCS=ON" - # "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core - "-DLLVM_ENABLE_SPHINX=ON" - "-DSPHINX_OUTPUT_HTML=ON" - "-DSPHINX_OUTPUT_MAN=ON" - "-DSPHINX_WARNINGS_AS_ERRORS=OFF" - ] - ++ lib.optionals buildTests [ - "-DLLVM_INCLUDE_TESTS=ON" - "-DLLVM_BUILD_TESTS=ON" - "-DLLVM_EXTERNAL_LIT=${lit}/bin/.lit-wrapped" - ] - ++ extraCMakeFlags; - - prePatch = '' - cd ../ - chmod -R u+w . - ''; - - postPatch = - '' - cd ${targetDir} - '' - + lib.optionalString finalAttrs.passthru.isLLVM '' - patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh - '' - + lib.optionalString (buildTests && finalAttrs.passthru.isLLVM) '' - # FileSystem permissions tests fail with various special bits - rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test - rm unittests/Support/Path.cpp - - substituteInPlace unittests/Support/CMakeLists.txt \ - --replace-fail "Path.cpp" "" - '' - + extraPostPatch; - - doCheck = buildTests; - checkTarget = lib.concatStringsSep " " checkTargets; - - postInstall = - lib.optionalString buildMan '' - mkdir -p $info - '' - + extraPostInstall; - - passthru = { - isLLVM = targetDir == "llvm"; - isClang = targetDir == "clang" || builtins.elem "clang" targetProjects; - isROCm = true; - - updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - }; - - inherit hardeningDisable requiredSystemFeatures; - - meta = with lib; { - description = "ROCm fork of the LLVM compiler infrastructure"; - homepage = "https://github.com/ROCm/llvm-project"; - license = with licenses; [ ncsa ] ++ extraLicenses; - maintainers = - with maintainers; - [ - acowley - lovesegfault - ] - ++ teams.rocm.members; - platforms = platforms.linux; - broken = isBroken || versionAtLeast finalAttrs.version "7.0.0"; - }; -}) diff --git a/pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch b/pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch new file mode 100644 index 000000000000..74cdbacc8a71 --- /dev/null +++ b/pkgs/development/rocm-modules/6/llvm/clang-at-least-16-LLVMgold-path.patch @@ -0,0 +1,14 @@ +diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp +index 34640b3c450d..93c4a4f4ec5c 100644 +--- a/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/lib/Driver/ToolChains/CommonArgs.cpp +@@ -589,8 +589,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, + #endif + + SmallString<1024> Plugin; +- llvm::sys::path::native(Twine(D.Dir) + +- "/../" CLANG_INSTALL_LIBDIR_BASENAME + ++ llvm::sys::path::native(Twine("@libllvmLibdir@") + + PluginName + Suffix, + Plugin); + CmdArgs.push_back(Args.MakeArgString(Twine(PluginPrefix) + Plugin)); diff --git a/pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff b/pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff new file mode 100644 index 000000000000..5f44dbe15655 --- /dev/null +++ b/pkgs/development/rocm-modules/6/llvm/clang-bodge-ignore-systemwide-incls.diff @@ -0,0 +1,23 @@ +diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp +index 57368104c914..71c57f72078e 100644 +--- a/lib/Driver/ToolChains/Linux.cpp ++++ b/lib/Driver/ToolChains/Linux.cpp +@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + return; + + // LOCAL_INCLUDE_DIR ++ if (!SysRoot.empty()) + addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include")); + // TOOL_INCLUDE_DIR + AddMultilibIncludeArgs(DriverArgs, CC1Args); +@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + // Add an include of '/include' directly. This isn't provided by default by + // system GCCs, but is often used with cross-compiling GCCs, and harmless to + // add even when Clang is acting as-if it were a system compiler. ++ if (!SysRoot.empty()) + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include")); + ++ if (!SysRoot.empty()) + addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include")); + + if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl()) diff --git a/pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff b/pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff new file mode 100644 index 000000000000..3a3a712c8bba --- /dev/null +++ b/pkgs/development/rocm-modules/6/llvm/clang-log-jobs.diff @@ -0,0 +1,40 @@ +diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp +index 06f5e7e7e335..8407d664886a 100644 +--- a/lib/Driver/Compilation.cpp ++++ b/lib/Driver/Compilation.cpp +@@ -340,6 +340,9 @@ private: + void Compilation::ExecuteJobs(const JobList &Jobs, + FailingCommandList &FailingCommands, + bool LogOnly) const { ++ // If >1 job, log as each job finishes so can see progress while building many offloads ++ const bool logJobs = Jobs.size() > 1; ++ auto start_time = std::chrono::steady_clock::now(); + // According to UNIX standard, driver need to continue compiling all the + // inputs on the command line even one of them failed. + // In all but CLMode, execute all the jobs unless the necessary inputs for the +@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs, + + JS.setJobState(Next, JobScheduler::JS_RUN); + auto Work = [&, Next]() { ++ auto job_start_time = std::chrono::steady_clock::now(); + const Command *FailingCommand = nullptr; + if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) { + FailingCommands.push_back(std::make_pair(Res, FailingCommand)); + JS.setJobState(Next, JobScheduler::JS_FAIL); + } else { ++ if (logJobs && Next) { ++ auto now = std::chrono::steady_clock::now(); ++ auto job_duration = std::chrono::duration_cast(now - job_start_time).count(); ++ auto duration = std::chrono::duration_cast(now - start_time).count(); ++ if (duration > 10 && job_duration > 0) { ++ if (Next->getOutputFilenames().empty()) ++ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n"; ++ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true); ++ else ++ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n"; ++ } ++ } ++ + JS.setJobState(Next, JobScheduler::JS_DONE); + } + }; diff --git a/pkgs/development/rocm-modules/6/llvm/default.nix b/pkgs/development/rocm-modules/6/llvm/default.nix index f68d29e37a19..366ec2449f9f 100644 --- a/pkgs/development/rocm-modules/6/llvm/default.nix +++ b/pkgs/development/rocm-modules/6/llvm/default.nix @@ -1,142 +1,515 @@ { - # stdenv FIXME: Try changing back to this with a new ROCm release https://github.com/NixOS/nixpkgs/issues/271943 - gcc12Stdenv, - callPackage, - rocmUpdateScript, - wrapBintoolsWith, + lib, + stdenv, + llvmPackages_18, overrideCC, rocm-device-libs, rocm-runtime, - rocm-thunk, - clr, + fetchFromGitHub, + runCommand, + symlinkJoin, + rdfind, + wrapBintoolsWith, + emptyDirectory, + zstd, + zlib, + gcc-unwrapped, + glibc, + substituteAll, + libffi, + libxml2, + removeReferencesTo, + fetchpatch, + # Build compilers and stdenv suitable for profiling + # compressed line tables (-g1 -gz) and + # frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer) + # TODO: Should also apply to downstream packages which use rocmClangStdenv + profilableStdenv ? false, }: let - ## Stage 1 ## - # Projects - llvm = callPackage ./stage-1/llvm.nix { - inherit rocmUpdateScript; - stdenv = gcc12Stdenv; - }; - clang-unwrapped = callPackage ./stage-1/clang-unwrapped.nix { - inherit rocmUpdateScript llvm; - stdenv = gcc12Stdenv; - }; - lld = callPackage ./stage-1/lld.nix { - inherit rocmUpdateScript llvm; - stdenv = gcc12Stdenv; + llvmPackagesNoBintools = llvmPackages_18.override { + bootBintools = null; + bootBintoolsNoLibc = null; }; + useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++ - # Runtimes - runtimes = callPackage ./stage-1/runtimes.nix { - inherit rocmUpdateScript llvm; - stdenv = gcc12Stdenv; - }; + llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM; + llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv ( + llvmPackagesNoBintools.libstdcxxClang.override { + inherit (llvmPackages_18) bintools; + } + ); + stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv; + gcc-include = runCommand "gcc-include" { } '' + mkdir -p $out + ln -s ${gcc-unwrapped}/include/ $out/ + ln -s ${gcc-unwrapped}/lib/ $out/ + ''; - ## Stage 2 ## - # Helpers - bintools-unwrapped = callPackage ./stage-2/bintools-unwrapped.nix { inherit llvm lld; }; - bintools = wrapBintoolsWith { bintools = bintools-unwrapped; }; - rStdenv = callPackage ./stage-2/rstdenv.nix { - inherit - llvm - clang-unwrapped - lld - runtimes - bintools - ; - stdenv = gcc12Stdenv; + # A prefix for use as the GCC prefix when building rocmcxx + disallowedRefsForToolchain = [ + stdenv.cc + stdenv.cc.cc + stdenv.cc.bintools + gcc-unwrapped + stdenvToBuildRocmLlvm + ]; + gcc-prefix = + let + gccPrefixPaths = [ + gcc-unwrapped + gcc-unwrapped.lib + glibc.dev + ]; + in + symlinkJoin { + name = "gcc-prefix"; + paths = gccPrefixPaths ++ [ + glibc + ]; + disallowedRequisites = gccPrefixPaths; + postBuild = '' + rm -rf $out/{bin,libexec,nix-support,lib64,share,etc} + rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h + + mkdir /build/tmpout + mv $out/* /build/tmpout + cp -Lr --no-preserve=mode /build/tmpout/* $out/ + set -x + versionedIncludePath="$(echo $out/include/c++/*/)" + mv $versionedIncludePath/* $out/include/c++/ + rm -rf $versionedIncludePath/ + + find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} + + + ln -s $out $out/x86_64-unknown-linux-gnu + ''; + }; + version = "6.3.1"; + # major version of this should be the clang version ROCm forked from + rocmLlvmVersion = "18.0.0-${llvmSrc.rev}"; + usefulOutputs = + drv: + builtins.filter (x: x != null) [ + drv + (drv.lib or null) + (drv.dev or null) + ]; + listUsefulOutputs = builtins.concatMap usefulOutputs; + llvmSrc = fetchFromGitHub { + # Performance improvements cherry-picked on top of rocm-6.3.x + # most importantly, amdgpu-early-alwaysinline memory usage fix + owner = "LunNova"; + repo = "llvm-project-rocm"; + rev = "4182046534deb851753f0d962146e5176f648893"; + hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c="; }; + llvmSrcFixed = llvmSrc; + llvmMajorVersion = lib.versions.major rocmLlvmVersion; + # An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree + # optionally using LLVM libcxx + llvmPackagesRocm = llvmPackages_18.override (_old: { + stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv; + + # not setting gitRelease = because that causes patch selection logic to use git patches + # ROCm LLVM is closer to 18 official + # gitRelease = {}; officialRelease = null; + officialRelease = { }; # Set but empty because we're overriding everything from it. + version = rocmLlvmVersion; + src = llvmSrcFixed; + monorepoSrc = llvmSrcFixed; + doCheck = false; + }); + sysrootCompiler = + cc: name: paths: + let + linked = symlinkJoin { inherit name paths; }; + in + runCommand name { } '' + set -x + mkdir -p $out/ + cp --reflink=auto -rL ${linked}/* $out/ + chmod -R +rw $out + mkdir -p $out/usr + ln -s $out/ $out/usr/local + mkdir -p $out/nix-support/ + rm -rf $out/lib64 # we don't need mixed 32 bit + echo 'export CC=clang' >> $out/nix-support/setup-hook + echo 'export CXX=clang++' >> $out/nix-support/setup-hook + mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/ + ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/ + + find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} + + find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} + + + # our /include now has more than clang expects, so this specific dir still needs to point to cc.dev + # FIXME: could copy into a different subdir? + sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake + ${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space + ''; + findClangNostdlibincPatch = + x: + ( + (lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x)) + || (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x)) + ); + llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${ + { + "x86_64" = "X86"; + "aarch64" = "AArch64"; + } + .${llvmStdenv.targetPlatform.parsed.cpu.name} + }"; + # -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too + # FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code, + # instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to + # use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time. + + tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x); + addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang; + llvmExtraCflags = + "-O3 -DNDEBUG -march=skylake -mtune=znver3" + + (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto") + + (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects") + + (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1"); in rec { - inherit - llvm - clang-unwrapped - lld - bintools - ; - - # Runtimes - libc = callPackage ./stage-2/libc.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; + inherit (llvmPackagesRocm) libunwind; + inherit (llvmPackagesRocm) libcxx; + llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig + clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig + llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: { + dontStrip = profilableStdenv; + nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + env.NIX_BUILD_ID_STYLE = "fast"; + postPatch = '' + ${old.postPatch or ""} + patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh + ''; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + cmakeFlags = + (builtins.filter tablegenUsage old.cmakeFlags) + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + "-DLLVM_ENABLE_LIBCXX=ON" + ]; + preConfigure = '' + ${old.preConfigure or ""} + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + postFixup = '' + ${old.postFixup or ""} + remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a" + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + }); + lld = + (llvmPackagesRocm.lld.override { + libllvm = llvm; + ninja = emptyDirectory; + }).overrideAttrs + (old: { + patches = builtins.filter ( + x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x)) + ) old.patches; + dontStrip = profilableStdenv; + nativeBuildInputs = old.nativeBuildInputs ++ [ + llvmPackagesNoBintools.lld + removeReferencesTo + ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + env.NIX_BUILD_ID_STYLE = "fast"; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + cmakeFlags = + (builtins.filter tablegenUsage old.cmakeFlags) + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LIBCXX=ON" + ]; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + postFixup = '' + ${old.postFixup or ""} + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + preConfigure = '' + ${old.preConfigure or ""} + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + }); + clang-unwrapped = + ( + (llvmPackagesRocm.clang-unwrapped.override { + libllvm = llvm; + ninja = emptyDirectory; + }).overrideAttrs + ( + old: + let + filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches; + in + { + meta.platforms = [ + "x86_64-linux" + ]; + pname = "${old.pname}-rocm"; + patches = filteredPatches ++ [ + ./clang-bodge-ignore-systemwide-incls.diff + ./clang-log-jobs.diff + (fetchpatch { + # [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler + sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8="; + url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch"; + relative = "clang"; + }) + # FIXME: if llvm was overrideable properly this wouldn't be needed + (substituteAll { + src = ./clang-at-least-16-LLVMgold-path.patch; + libllvmLibdir = "${llvm.lib}/lib"; + }) + ]; + nativeBuildInputs = old.nativeBuildInputs ++ [ + llvmPackagesNoBintools.lld + removeReferencesTo + ]; + buildInputs = old.buildInputs ++ [ + zstd + zlib + ]; + dontStrip = profilableStdenv; + LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib"; + env = (old.env or { }) // { + NIX_BUILD_ID_STYLE = "fast"; + }; + # Ensure we don't leak refs to compiler that was used to bootstrap this LLVM + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ]; + # https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11 + cmakeFlags = + (builtins.filter tablegenUsage old.cmakeFlags) + ++ [ + llvmTargetsFlag + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_THREADS=ON" + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_USE_LINKER=lld" + (lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx) + "-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + "-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm" + ] + ++ lib.optionals useLibcxx [ + "-DLLVM_ENABLE_LTO=Thin" + "-DLLVM_ENABLE_LIBCXX=ON" + "-DLLVM_USE_LINKER=lld" + "-DCLANG_DEFAULT_RTLIB=compiler-rt" + ] + ++ lib.optionals (!useLibcxx) [ + # FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX? + "-DGCC_INSTALL_PREFIX=${gcc-prefix}" + ]; + postFixup = + (old.postFixup or "") + + '' + find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} + + find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} + + ''; + preConfigure = + (old.preConfigure or "") + + '' + cmakeFlagsArray+=( + '-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}' + '-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}' + ) + ''; + } + ) + ) + // { + libllvm = llvm; + }; + # A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path + # in the right order + # and expects its libc to be in the sysroot + rocmcxx = + (sysrootCompiler clang-unwrapped "rocmcxx" ( + listUsefulOutputs ( + [ + clang-unwrapped + bintools + compiler-rt + ] + ++ (lib.optionals useLibcxx [ + libcxx + ]) + ++ (lib.optionals (!useLibcxx) [ + gcc-include + glibc + glibc.dev + ]) + ) + )) + // { + version = llvmMajorVersion; + cc = rocmcxx; + libllvm = llvm; + isClang = true; + isGNU = false; + }; + clang-tools = llvmPackagesRocm.clang-tools.override { + inherit clang-unwrapped clang; }; - libunwind = callPackage ./stage-2/libunwind.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; - }; - libcxxabi = callPackage ./stage-2/libcxxabi.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; - }; - libcxx = callPackage ./stage-2/libcxx.nix { - inherit rocmUpdateScript; - stdenv = rStdenv; - }; - compiler-rt = callPackage ./stage-2/compiler-rt.nix { - inherit rocmUpdateScript llvm; - stdenv = rStdenv; + compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: { + patches = old.patches ++ [ + (fetchpatch { + name = "Fix-missing-main-function-in-float16-bfloat16-support-checks.patch"; + url = "https://github.com/ROCm/llvm-project/commit/68d8b3846ab1e6550910f2a9a685690eee558af2.patch"; + hash = "sha256-Db+L1HFMWVj4CrofsGbn5lnMoCzEcU+7q12KKFb17/g="; + relative = "compiler-rt"; + }) + ]; + }); + compiler-rt = compiler-rt-libc; + bintools = wrapBintoolsWith { + bintools = llvmPackagesRocm.bintools-unwrapped.override { + inherit lld llvm; + }; }; - ## Stage 3 ## - # Helpers - clang = callPackage ./stage-3/clang.nix { - inherit - llvm - lld - clang-unwrapped - bintools - libc - libunwind - libcxxabi - libcxx - compiler-rt - ; - stdenv = gcc12Stdenv; + clang = rocmcxx; + + # Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects + rocm-merged-llvm = symlinkJoin { + name = "rocm-llvm-merge"; + paths = + [ + llvm + llvm.dev + lld + lld.lib + lld.dev + libunwind + libunwind.dev + compiler-rt + compiler-rt.dev + rocmcxx + ] + ++ lib.optionals useLibcxx [ + libcxx + libcxx.out + libcxx.dev + ]; + postBuild = builtins.unsafeDiscardStringContext '' + found_files=$(find $out -name '*.cmake') + if [ -z "$found_files" ]; then + >&2 echo "Error: No CMake files found in $out" + exit 1 + fi + + for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do + if grep "$target" $found_files; then + >&2 echo "Unexpected ref to $target (clang-unwrapped) found" + # exit 1 + # # FIXME: enable this to reduce closure size + fi + done + ''; + inherit version; + llvm-src = llvmSrc; }; - rocmClangStdenv = overrideCC gcc12Stdenv clang; + + rocmClangStdenv = overrideCC ( + if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv + ) clang; # Projects - clang-tools-extra = callPackage ./stage-3/clang-tools-extra.nix { - inherit rocmUpdateScript llvm clang-unwrapped; - stdenv = rocmClangStdenv; - }; - libclc = callPackage ./stage-3/libclc.nix { - inherit rocmUpdateScript llvm clang; - stdenv = rocmClangStdenv; - }; - lldb = callPackage ./stage-3/lldb.nix { - inherit rocmUpdateScript clang; - stdenv = rocmClangStdenv; - }; - mlir = callPackage ./stage-3/mlir.nix { - inherit rocmUpdateScript clr; - stdenv = rocmClangStdenv; - }; - polly = callPackage ./stage-3/polly.nix { - inherit rocmUpdateScript; - stdenv = rocmClangStdenv; - }; - flang = callPackage ./stage-3/flang.nix { - inherit rocmUpdateScript clang-unwrapped mlir; - stdenv = rocmClangStdenv; - }; - openmp = callPackage ./stage-3/openmp.nix { - inherit - rocmUpdateScript - llvm - clang-unwrapped - clang - rocm-device-libs - rocm-runtime - rocm-thunk - ; - stdenv = rocmClangStdenv; - }; - - # Runtimes - pstl = callPackage ./stage-3/pstl.nix { - inherit rocmUpdateScript; - stdenv = rocmClangStdenv; - }; + openmp = + (llvmPackagesRocm.openmp.override { + stdenv = rocmClangStdenv; + llvm = rocm-merged-llvm; + targetLlvm = rocm-merged-llvm; + clang-unwrapped = clang; + }).overrideAttrs + (old: { + disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain; + nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ]; + cmakeFlags = + old.cmakeFlags + ++ [ + "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" + # OMPD support is broken in ROCm 6.3. Haven't investigated why. + "-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE" + "-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE" + ] + ++ lib.optionals addGccLtoCmakeFlags [ + "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar" + "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib" + ]; + env.LLVM = "${rocm-merged-llvm}"; + env.LLVM_DIR = "${rocm-merged-llvm}"; + buildInputs = old.buildInputs ++ [ + rocm-device-libs + rocm-runtime + zlib + zstd + libxml2 + libffi + ]; + }); } diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix deleted file mode 100644 index 7e44ecb0bff2..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/clang-unwrapped.nix +++ /dev/null @@ -1,48 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, -}: - -callPackage ../base.nix { - inherit stdenv rocmUpdateScript; - targetName = "clang-unwrapped"; - targetDir = "clang"; - extraBuildInputs = [ llvm ]; - - extraCMakeFlags = [ - "-DCLANG_INCLUDE_DOCS=ON" - "-DCLANG_INCLUDE_TESTS=ON" - ]; - - extraPostPatch = '' - # Looks like they forgot to add finding libedit to the standalone build - ln -s ../cmake/Modules/FindLibEdit.cmake cmake/modules - - substituteInPlace CMakeLists.txt \ - --replace-fail "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)" - - # `No such file or directory: '/build/source/clang/tools/scan-build/bin/scan-build'` - rm test/Analysis/scan-build/*.test - rm test/Analysis/scan-build/rebuild_index/rebuild_index.test - - # `does not depend on a module exporting 'baz.h'` - rm test/Modules/header-attribs.cpp - - # We do not have HIP or the ROCm stack available yet - rm test/Driver/hip-options.hip - - # ???? `ld: cannot find crti.o: No such file or directory` linker issue? - rm test/Interpreter/dynamic-library.cpp - - # `fatal error: 'stdio.h' file not found` - rm test/OpenMP/amdgcn_emit_llvm.c - ''; - - extraPostInstall = '' - mv bin/clang-tblgen $out/bin - ''; - - requiredSystemFeatures = [ "big-parallel" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix deleted file mode 100644 index 6a6226a221e0..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/lld.nix +++ /dev/null @@ -1,15 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "lld"; - targetDir = targetName; - extraBuildInputs = [ llvm ]; - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix deleted file mode 100644 index a9464da16697..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/llvm.nix +++ /dev/null @@ -1,11 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix { - inherit stdenv rocmUpdateScript; - requiredSystemFeatures = [ "big-parallel" ]; - isBroken = stdenv.hostPlatform.isAarch64; # https://github.com/ROCm/ROCm/issues/1831#issuecomment-1278205344 -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix b/pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix deleted file mode 100644 index 268ad973b913..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-1/runtimes.nix +++ /dev/null @@ -1,32 +0,0 @@ -{ - lib, - stdenv, - callPackage, - rocmUpdateScript, - llvm, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; - buildMan = false; - buildTests = false; - targetName = "runtimes"; - targetDir = targetName; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - "libcxx" - "compiler-rt" - ]; - - extraBuildInputs = [ llvm ]; - - extraCMakeFlags = [ - "-DLIBCXX_INCLUDE_BENCHMARKS=OFF" - "-DLIBCXX_CXX_ABI=libcxxabi" - ]; - - extraLicenses = [ lib.licenses.mit ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list b/pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list deleted file mode 100644 index a70c98d4e473..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/1000-libcxx-failing-tests.list +++ /dev/null @@ -1,176 +0,0 @@ -../libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp -../libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp -../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp -../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/construct.cxx2a.pass.cpp -../libcxx/test/libcxx/input.output/filesystems/class.directory_entry/directory_entry.mods/last_write_time.pass.cpp -../libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp -../libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp -../libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/no_allocation.pass.cpp -../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_underaligned_buffer.pass.cpp -../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp -../libcxx/test/std/containers/associative/map/map.access/index_key.pass.cpp -../libcxx/test/std/containers/associative/map/map.access/index_rv_key.pass.cpp -../libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp -../libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp -../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp -../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp -../libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp -../libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp -../libcxx/test/std/containers/unord/unord.map/unord.map.elem/index.pass.cpp -../libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp -../libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp -../libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp -../libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp -../libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/path.decompose.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_normal.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_relative_and_proximate.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/clear.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/make_preferred.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/remove_filename.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_extension.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_filename.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/swap.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.factory.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp -../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/swap.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp -../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_large.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_symlink/copy_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory_symlink/create_directory_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_symlink/create_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.proximate/proximate.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/remove_all.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove/remove.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.temp_dir_path/temp_directory_path.pass.cpp -../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp -../libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp -../libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp -../libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp -../libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp -../libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp -../libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/default.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp -../libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp -../libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp -../libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.alg/swap.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_assign.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_assign.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp -../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp -../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp -../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_throw.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp -../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.ctor/without_buffer.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_deallocate.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_initial_buffer.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_zero_sized_buffer.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.ctor/ctor_does_not_allocate.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp -../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate.pass.cpp -../libcxx/test/std/language.support/support.dynamic/hardware_inference_size.compile.pass.cpp -../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array14.pass.cpp -../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete14.pass.cpp -../libcxx/test/libcxx/selftest/sh.cpp/empty.sh.cpp -../libcxx/test/libcxx/transitive_includes.sh.cpp diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix deleted file mode 100644 index e17a913d4bb7..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/bintools-unwrapped.nix +++ /dev/null @@ -1,29 +0,0 @@ -{ - runCommand, - llvm, - lld, -}: - -runCommand "rocm-llvm-binutils-${llvm.version}" { preferLocalBuild = true; } '' - mkdir -p $out/bin - - for prog in ${lld}/bin/*; do - ln -s $prog $out/bin/$(basename $prog) - done - - for prog in ${llvm}/bin/*; do - ln -sf $prog $out/bin/$(basename $prog) - done - - ln -s ${llvm}/bin/llvm-ar $out/bin/ar - ln -s ${llvm}/bin/llvm-as $out/bin/as - ln -s ${llvm}/bin/llvm-dwp $out/bin/dwp - ln -s ${llvm}/bin/llvm-nm $out/bin/nm - ln -s ${llvm}/bin/llvm-objcopy $out/bin/objcopy - ln -s ${llvm}/bin/llvm-objdump $out/bin/objdump - ln -s ${llvm}/bin/llvm-ranlib $out/bin/ranlib - ln -s ${llvm}/bin/llvm-readelf $out/bin/readelf - ln -s ${llvm}/bin/llvm-size $out/bin/size - ln -s ${llvm}/bin/llvm-strip $out/bin/strip - ln -s ${lld}/bin/lld $out/bin/ld -'' diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix deleted file mode 100644 index f3b8648e1104..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/compiler-rt.nix +++ /dev/null @@ -1,64 +0,0 @@ -{ - lib, - stdenv, - callPackage, - rocmUpdateScript, - llvm, - glibc, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "compiler-rt"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - "libcxx" - targetName - ]; - - extraCMakeFlags = [ - "-DCOMPILER_RT_INCLUDE_TESTS=ON" - "-DCOMPILER_RT_USE_LLVM_UNWINDER=ON" - "-DCOMPILER_RT_CXX_LIBRARY=libcxx" - "-DCOMPILER_RT_CAN_EXECUTE_TESTS=OFF" # We can't run most of these - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXXABI_INCLUDE_TESTS=OFF" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - "-DLIBCXXABI_INSTALL_LIBRARY=OFF" - "-DLIBCXXABI_INSTALL_HEADERS=OFF" - "-DLIBCXX_INCLUDE_DOCS=OFF" - "-DLIBCXX_INCLUDE_TESTS=OFF" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - "-DLIBCXX_INSTALL_LIBRARY=OFF" - "-DLIBCXX_INSTALL_HEADERS=OFF" - ]; - - extraPostPatch = '' - # `No such file or directory: 'ldd'` - substituteInPlace ../compiler-rt/test/lit.common.cfg.py \ - --replace "'ldd'," "'${glibc.bin}/bin/ldd'," - - # We can run these - substituteInPlace ../compiler-rt/test/CMakeLists.txt \ - --replace "endfunction()" "endfunction()''\nadd_subdirectory(builtins)''\nadd_subdirectory(shadowcallstack)" - - # Could not launch llvm-config in /build/source/runtimes/build/bin - mkdir -p build/bin - ln -s ${llvm}/bin/llvm-config build/bin - ''; - - extraLicenses = [ lib.licenses.mit ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix deleted file mode 100644 index 26d33460d433..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libc.nix +++ /dev/null @@ -1,27 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "libc"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - - extraPostPatch = '' - # `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:` - # We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working... - substituteInPlace ../libc/test/src/math/log10_test.cpp \ - --replace-fail "i < N" "i < 0" \ - --replace-fail "test(mpfr::RoundingMode::Nearest);" "" \ - --replace-fail "test(mpfr::RoundingMode::Downward);" "" \ - --replace-fail "test(mpfr::RoundingMode::Upward);" "" \ - --replace-fail "test(mpfr::RoundingMode::TowardZero);" "" - ''; - - checkTargets = [ "check-${targetName}" ]; - hardeningDisable = [ "fortify" ]; # Prevent `error: "Assumed value of MB_LEN_MAX wrong"` -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix deleted file mode 100644 index b9ed102d5408..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxx.nix +++ /dev/null @@ -1,43 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "libcxx"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - targetName - ]; - - extraCMakeFlags = [ - "-DLIBCXX_INCLUDE_DOCS=ON" - "-DLIBCXX_INCLUDE_TESTS=ON" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXXABI_INCLUDE_TESTS=OFF" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - "-DLIBCXXABI_INSTALL_LIBRARY=OFF" - "-DLIBCXXABI_INSTALL_HEADERS=OFF" - ]; - - # Most of these can't find `bash` or `mkdir`, might just be hard-coded paths, or PATH is altered - extraPostPatch = '' - chmod +w -R ../libcxx/test/{libcxx,std} - cat ${./1000-libcxx-failing-tests.list} | xargs -d \\n rm - ''; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix deleted file mode 100644 index bc54e17be45f..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libcxxabi.nix +++ /dev/null @@ -1,38 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "libcxxabi"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - targetName - "libcxx" - ]; - - extraCMakeFlags = [ - "-DLIBCXXABI_INCLUDE_TESTS=ON" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXX_INCLUDE_DOCS=OFF" - "-DLIBCXX_INCLUDE_TESTS=OFF" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - "-DLIBCXX_INSTALL_LIBRARY=OFF" - "-DLIBCXX_INSTALL_HEADERS=OFF" - ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix deleted file mode 100644 index fb5e7cb3b68c..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/libunwind.nix +++ /dev/null @@ -1,27 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildMan = false; # No man pages to build - targetName = "libunwind"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - - extraCMakeFlags = [ - "-DLIBUNWIND_INCLUDE_DOCS=ON" - "-DLIBUNWIND_INCLUDE_TESTS=ON" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - ]; - - extraPostPatch = '' - # `command had no output on stdout or stderr` (Says these unsupported tests) - chmod +w -R ../libunwind/test - rm ../libunwind/test/floatregister.pass.cpp - rm ../libunwind/test/unwind_leaffunction.pass.cpp - rm ../libunwind/test/libunwind_02.pass.cpp - ''; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix b/pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix deleted file mode 100644 index f83abe36cc2e..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-2/rstdenv.nix +++ /dev/null @@ -1,37 +0,0 @@ -{ - stdenv, - overrideCC, - wrapCCWith, - llvm, - clang-unwrapped, - lld, - runtimes, - bintools, -}: - -overrideCC stdenv (wrapCCWith rec { - inherit bintools; - libcxx = runtimes; - cc = clang-unwrapped; - gccForLibs = stdenv.cc.cc; - - extraPackages = [ - llvm - lld - ]; - - nixSupport.cc-cflags = [ - "-resource-dir=$out/resource-root" - "-fuse-ld=lld" - "-rtlib=compiler-rt" - "-unwindlib=libunwind" - "-Wno-unused-command-line-argument" - ]; - - extraBuildCommands = '' - clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/resource-root - ln -s ${cc}/lib/clang/$clang_version/include $out/resource-root - ln -s ${runtimes}/lib $out/resource-root - ''; -}) diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list b/pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list deleted file mode 100644 index e53b21b3c535..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/1000-openmp-failing-tests.list +++ /dev/null @@ -1,122 +0,0 @@ -runtime/test/tasking/hidden_helper_task/gtid.cpp -runtime/test/ompt/parallel/parallel_if0.c -runtime/test/ompt/parallel/serialized.c -runtime/test/ompt/teams/parallel_team.c -runtime/test/ompt/teams/serial_teams.c -runtime/test/ompt/teams/serialized.c -runtime/test/ompt/teams/team.c -libomptarget/test/api/assert.c -libomptarget/test/api/omp_device_managed_memory.c -libomptarget/test/api/omp_device_memory.c -libomptarget/test/api/omp_get_device_num.c -libomptarget/test/api/omp_host_pinned_memory.c -libomptarget/test/api/omp_host_pinned_memory_alloc.c -libomptarget/test/api/omp_target_memcpy_async1.c -libomptarget/test/api/omp_target_memcpy_async2.c -libomptarget/test/api/omp_target_memcpy_rect_async1.c -libomptarget/test/api/omp_target_memcpy_rect_async2.c -libomptarget/test/mapping/array_section_implicit_capture.c -libomptarget/test/mapping/data_absent_at_exit.c -libomptarget/test/mapping/data_member_ref.cpp -libomptarget/test/mapping/declare_mapper_api.cpp -libomptarget/test/mapping/declare_mapper_target.cpp -libomptarget/test/mapping/declare_mapper_target_data.cpp -libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp -libomptarget/test/mapping/firstprivate_aligned.cpp -libomptarget/test/mapping/has_device_addr.cpp -libomptarget/test/mapping/implicit_device_ptr.c -libomptarget/test/mapping/is_device_ptr.cpp -libomptarget/test/mapping/lambda_mapping.cpp -libomptarget/test/mapping/low_alignment.c -libomptarget/test/mapping/map_back_race.cpp -libomptarget/test/mapping/power_of_two_alignment.c -libomptarget/test/mapping/pr38704.c -libomptarget/test/mapping/prelock.cpp -libomptarget/test/mapping/present/target_data_at_exit.c -libomptarget/test/mapping/private_mapping.c -libomptarget/test/mapping/ptr_and_obj_motion.c -libomptarget/test/mapping/reduction_implicit_map.cpp -libomptarget/test/mapping/target_derefence_array_pointrs.cpp -libomptarget/test/mapping/target_map_for_member_data.cpp -libomptarget/test/mapping/target_update_array_extension.c -libomptarget/test/mapping/target_use_device_addr.c -libomptarget/test/offloading/atomic-compare-signedness.c -libomptarget/test/offloading/bug47654.cpp -libomptarget/test/offloading/bug49021.cpp -libomptarget/test/offloading/bug49779.cpp -libomptarget/test/offloading/bug50022.cpp -libomptarget/test/offloading/bug51781.c -libomptarget/test/offloading/bug51982.c -libomptarget/test/offloading/bug53727.cpp -libomptarget/test/offloading/complex_reduction.cpp -libomptarget/test/offloading/cuda_no_devices.c -libomptarget/test/offloading/d2d_memcpy.c -libomptarget/test/offloading/dynamic_module.c -libomptarget/test/offloading/dynamic_module_load.c -libomptarget/test/offloading/global_constructor.cpp -libomptarget/test/offloading/lone_target_exit_data.c -libomptarget/test/offloading/memory_manager.cpp -libomptarget/test/offloading/parallel_offloading_map.cpp -libomptarget/test/offloading/static_linking.c -libomptarget/test/offloading/std_complex_arithmetic.cpp -libomptarget/test/offloading/target-teams-atomic.c -libomptarget/test/offloading/target_constexpr_mapping.cpp -libomptarget/test/offloading/target_critical_region.cpp -libomptarget/test/offloading/target_depend_nowait.cpp -libomptarget/test/offloading/target_nowait_target.cpp -libomptarget/test/offloading/taskloop_offload_nowait.cpp -libomptarget/test/offloading/test_libc.cpp -libomptarget/test/ompt/veccopy.c -libomptarget/test/ompt/veccopy_disallow_both.c -libomptarget/test/ompt/veccopy_emi.c -libomptarget/test/ompt/veccopy_emi_map.c -libomptarget/test/ompt/veccopy_map.c -libomptarget/test/ompt/veccopy_no_device_init.c -libomptarget/test/ompt/veccopy_wrong_return.c -libomptarget/test/api/is_initial_device.c -libomptarget/test/mapping/declare_mapper_nested_default_mappers_array_subscript.cpp -libomptarget/test/mapping/declare_mapper_nested_default_mappers_ptr_subscript.cpp -libomptarget/test/mapping/declare_mapper_nested_default_mappers_var.cpp -libomptarget/test/mapping/target_pointers_members_map.cpp -libomptarget/test/api/omp_dynamic_shared_memory_mixed.c -libomptarget/test/api/omp_env_vars.c -libomptarget/test/api/omp_get_mapped_ptr.c -libomptarget/test/api/omp_get_num_devices.c -libomptarget/test/api/omp_get_num_devices_with_empty_target.c -libomptarget/test/mapping/alloc_fail.c -libomptarget/test/mapping/array_section_use_device_ptr.c -libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp -libomptarget/test/mapping/declare_mapper_nested_mappers.cpp -libomptarget/test/mapping/declare_mapper_target_update.cpp -libomptarget/test/mapping/delete_inf_refcount.c -libomptarget/test/mapping/lambda_by_value.cpp -libomptarget/test/mapping/ompx_hold/omp_target_disassociate_ptr.c -libomptarget/test/mapping/ompx_hold/struct.c -libomptarget/test/mapping/ompx_hold/target-data.c -libomptarget/test/mapping/ompx_hold/target.c -libomptarget/test/mapping/present/target.c -libomptarget/test/mapping/present/target_array_extension.c -libomptarget/test/mapping/present/target_data.c -libomptarget/test/mapping/present/target_data_array_extension.c -libomptarget/test/mapping/present/target_enter_data.c -libomptarget/test/mapping/present/target_exit_data_delete.c -libomptarget/test/mapping/present/target_exit_data_release.c -libomptarget/test/mapping/present/target_update.c -libomptarget/test/mapping/present/target_update_array_extension.c -libomptarget/test/mapping/present/zero_length_array_section.c -libomptarget/test/mapping/present/zero_length_array_section_exit.c -libomptarget/test/mapping/target_data_array_extension_at_exit.c -libomptarget/test/mapping/target_has_device_addr.c -libomptarget/test/mapping/target_implicit_partial_map.c -libomptarget/test/mapping/target_wrong_use_device_addr.c -libomptarget/test/offloading/host_as_target.c -libomptarget/test/offloading/info.c -libomptarget/test/offloading/offloading_success.c -libomptarget/test/offloading/offloading_success.cpp -libomptarget/test/offloading/wtime.c -libomptarget/test/unified_shared_memory/api.c -libomptarget/test/unified_shared_memory/associate_ptr.c -libomptarget/test/unified_shared_memory/close_enter_exit.c -libomptarget/test/unified_shared_memory/close_manual.c -libomptarget/test/unified_shared_memory/close_member.c -libomptarget/test/unified_shared_memory/close_modifier.c diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list b/pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list deleted file mode 100644 index 0b3d2d22592d..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/1001-mlir-failing-tests.list +++ /dev/null @@ -1,11 +0,0 @@ -./test/Target/LLVMIR/openmp-llvm.mlir -./test/mlir-spirv-cpu-runner/double.mlir -./test/mlir-spirv-cpu-runner/simple_add.mlir -./test/mlir-vulkan-runner/addf.mlir -./test/mlir-vulkan-runner/addi.mlir -./test/mlir-vulkan-runner/addi8.mlir -./test/mlir-vulkan-runner/mulf.mlir -./test/mlir-vulkan-runner/smul_extended.mlir -./test/mlir-vulkan-runner/subf.mlir -./test/mlir-vulkan-runner/time.mlir -./test/mlir-vulkan-runner/umul_extended.mlir diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix deleted file mode 100644 index 314ce9806424..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/clang-tools-extra.nix +++ /dev/null @@ -1,43 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, - clang-unwrapped, - gtest, -}: - -callPackage ../base.nix { - inherit stdenv rocmUpdateScript; - buildTests = false; # `invalid operands to binary expression ('std::basic_stringstream' and 'const llvm::StringRef')` - targetName = "clang-tools-extra"; - - targetProjects = [ - "clang" - "clang-tools-extra" - ]; - - extraBuildInputs = [ gtest ]; - - extraCMakeFlags = [ - "-DLLVM_INCLUDE_DOCS=OFF" - "-DLLVM_INCLUDE_TESTS=OFF" - "-DCLANG_INCLUDE_DOCS=OFF" - "-DCLANG_INCLUDE_TESTS=ON" - "-DCLANG_TOOLS_EXTRA_INCLUDE_DOCS=ON" - ]; - - extraPostInstall = '' - # Remove LLVM and Clang - for path in `find ${llvm} ${clang-unwrapped}`; do - if [ $path != ${llvm} ] && [ $path != ${clang-unwrapped} ]; then - rm -f $out''${path#${llvm}} $out''${path#${clang-unwrapped}} || true - fi - done - - # Cleanup empty directories - find $out -type d -empty -delete - ''; - - requiredSystemFeatures = [ "big-parallel" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix deleted file mode 100644 index 4afaa726ad78..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/clang.nix +++ /dev/null @@ -1,77 +0,0 @@ -{ - stdenv, - wrapCCWith, - llvm, - lld, - clang-unwrapped, - bintools, - libc, - libunwind, - libcxxabi, - libcxx, - compiler-rt, -}: - -wrapCCWith rec { - inherit libcxx bintools; - - # We do this to avoid HIP pathing problems, and mimic a monolithic install - cc = stdenv.mkDerivation (finalAttrs: { - inherit (clang-unwrapped) version; - pname = "rocm-llvm-clang"; - dontUnpack = true; - - installPhase = '' - runHook preInstall - - clang_version=`${clang-unwrapped}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/{bin,include/c++/v1,lib/{cmake,clang/$clang_version/{include,lib}},libexec,share} - - for path in ${llvm} ${clang-unwrapped} ${lld} ${libc} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do - cp -as $path/* $out - chmod +w $out/{*,include/c++/v1,lib/{clang/$clang_version/include,cmake}} - rm -f $out/lib/libc++.so - done - - ln -s $out/lib/* $out/lib/clang/$clang_version/lib - ln -sf $out/include/* $out/lib/clang/$clang_version/include - - runHook postInstall - ''; - - passthru.isClang = true; - passthru.isROCm = true; - }); - - gccForLibs = stdenv.cc.cc; - - extraPackages = [ - llvm - lld - libc - libunwind - libcxxabi - compiler-rt - ]; - - nixSupport.cc-cflags = [ - "-resource-dir=$out/resource-root" - "-fuse-ld=lld" - "-rtlib=compiler-rt" - "-unwindlib=libunwind" - "-Wno-unused-command-line-argument" - ]; - - extraBuildCommands = '' - clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/resource-root - ln -s ${cc}/lib/clang/$clang_version/{include,lib} $out/resource-root - - # Not sure why, but hardening seems to make things break - echo "" > $out/nix-support/add-hardening.sh - - # GPU compilation uses builtin `lld` - substituteInPlace $out/bin/{clang,clang++} \ - --replace-fail "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;" - ''; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix deleted file mode 100644 index c6e72d56ce56..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/flang.nix +++ /dev/null @@ -1,32 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - clang-unwrapped, - mlir, - graphviz, - python3Packages, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - targetName = "flang"; - targetDir = targetName; - - extraNativeBuildInputs = [ - graphviz - python3Packages.sphinx-markdown-tables - ]; - - extraBuildInputs = [ mlir ]; - - extraCMakeFlags = [ - "-DCLANG_DIR=${clang-unwrapped}/lib/cmake/clang" - "-DMLIR_TABLEGEN_EXE=${mlir}/bin/mlir-tblgen" - "-DCLANG_TABLEGEN_EXE=${clang-unwrapped}/bin/clang-tblgen" - "-DFLANG_INCLUDE_TESTS=OFF" # `The dependency target "Bye" of target ...` - ]; - - # `flang/lib/Semantics/check-omp-structure.cpp:1905:1: error: no member named 'v' in 'Fortran::parser::OmpClause::OmpxDynCgroupMem'` - isBroken = true; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix deleted file mode 100644 index c8a6b98d9130..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/libclc.nix +++ /dev/null @@ -1,38 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - llvm, - clang, - spirv-llvm-translator, -}: - -let - spirv = (spirv-llvm-translator.override { inherit llvm; }); -in -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "libclc"; - targetDir = targetName; - extraBuildInputs = [ spirv ]; - - # `spirv-mesa3d` isn't compiling with LLVM 15.0.0, it does with LLVM 14.0.0 - # Try removing the `spirv-mesa3d` and `clspv` patches next update - # `clspv` tests fail, unresolved calls - extraPostPatch = '' - substituteInPlace CMakeLists.txt \ - --replace-fail "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ - "find_program( LLVM_CLANG clang PATHS \"${clang}/bin\" NO_DEFAULT_PATH )" \ - --replace-fail "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ - "find_program( LLVM_SPIRV llvm-spirv PATHS \"${spirv}/bin\" NO_DEFAULT_PATH )" \ - --replace-fail " spirv-mesa3d-" "" \ - --replace-fail " spirv64-mesa3d-" "" \ - --replace-fail "NOT \''${t} MATCHES" \ - "NOT \''${ARCH} STREQUAL \"clspv\" AND NOT \''${ARCH} STREQUAL \"clspv64\" AND NOT \''${t} MATCHES" - ''; - - checkTargets = [ ]; - isBroken = true; # ROCm 5.7.0 doesn't have IR/AttributeMask.h yet...? -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix deleted file mode 100644 index 31694ce50113..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/lldb.nix +++ /dev/null @@ -1,40 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - clang, - xz, - swig, - lua5_3, - graphviz, - gtest, - python3Packages, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildTests = false; # FIXME: Bad pathing for clang executable in tests, using relative path most likely - targetName = "lldb"; - targetDir = targetName; - extraNativeBuildInputs = [ python3Packages.sphinx-automodapi ]; - - extraBuildInputs = [ - xz - swig - lua5_3 - graphviz - gtest - ]; - - extraCMakeFlags = [ - "-DLLDB_EXTERNAL_CLANG_RESOURCE_DIR=${clang}/resource-root/lib/clang/$clang_version" - "-DLLDB_INCLUDE_TESTS=ON" - "-DLLDB_INCLUDE_UNITTESTS=ON" - ]; - - extraPostPatch = '' - export clang_version=`clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - ''; - - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix deleted file mode 100644 index 9b87769e3e8f..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/mlir.nix +++ /dev/null @@ -1,61 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, - clr, - vulkan-headers, - vulkan-loader, - glslang, - shaderc, - fetchpatch, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No decent way to hack this to work - buildMan = false; # No man pages to build - targetName = "mlir"; - targetDir = targetName; - - # Fix `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'` - extraPatches = [ - (fetchpatch { - url = "https://github.com/ROCm/llvm-project/commit/f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74.patch"; - hash = "sha256-3c91A9InMKxm+JcnWxoUeOU68y5I6w1AAXx6T9UByqI="; - }) - ]; - extraNativeBuildInputs = [ clr ]; - - extraBuildInputs = [ - vulkan-headers - vulkan-loader - glslang - shaderc - ]; - - extraCMakeFlags = [ - "-DMLIR_INCLUDE_DOCS=ON" - "-DMLIR_INCLUDE_TESTS=ON" - "-DMLIR_ENABLE_ROCM_RUNNER=ON" - "-DMLIR_ENABLE_SPIRV_CPU_RUNNER=ON" - "-DMLIR_ENABLE_VULKAN_RUNNER=ON" - "-DROCM_TEST_CHIPSET=gfx000" # CPU runner - ]; - - extraPostPatch = '' - # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` - substituteInPlace CMakeLists.txt \ - --replace-fail "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE" - - # Mainly `No such file or directory` - cat ${./1001-mlir-failing-tests.list} | xargs -d \\n rm - ''; - - extraPostInstall = '' - mkdir -p $out/bin - mv bin/mlir-tblgen $out/bin - ''; - - checkTargets = [ "check-${targetName}" ]; - requiredSystemFeatures = [ "big-parallel" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix deleted file mode 100644 index 258166105780..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/openmp.nix +++ /dev/null @@ -1,55 +0,0 @@ -{ - lib, - stdenv, - callPackage, - rocmUpdateScript, - llvm, - clang, - clang-unwrapped, - rocm-device-libs, - rocm-runtime, - rocm-thunk, - perl, - elfutils, - libdrm, - numactl, - lit, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - targetName = "openmp"; - targetDir = targetName; - extraNativeBuildInputs = [ perl ]; - - extraBuildInputs = [ - rocm-device-libs - rocm-runtime - rocm-thunk - elfutils - libdrm - numactl - ]; - - extraCMakeFlags = [ - "-DCMAKE_MODULE_PATH=/build/source/llvm/cmake/modules" # For docs - "-DCLANG_TOOL=${clang}/bin/clang" - "-DCLANG_OFFLOAD_BUNDLER_TOOL=${clang-unwrapped}/bin/clang-offload-bundler" - "-DPACKAGER_TOOL=${clang-unwrapped}/bin/clang-offload-packager" - "-DOPENMP_LLVM_TOOLS_DIR=${llvm}/bin" - "-DOPENMP_LLVM_LIT_EXECUTABLE=${lit}/bin/.lit-wrapped" - "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" - ]; - - extraPostPatch = '' - # We can't build this target at the moment - substituteInPlace libomptarget/DeviceRTL/CMakeLists.txt \ - --replace "gfx1010" "" - - # No idea what's going on here... - cat ${./1000-openmp-failing-tests.list} | xargs -d \\n rm - ''; - - checkTargets = [ "check-${targetName}" ]; - extraLicenses = [ lib.licenses.mit ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix deleted file mode 100644 index d70a353d3a8c..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/polly.nix +++ /dev/null @@ -1,19 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - targetName = "polly"; - targetDir = targetName; - - extraPostPatch = '' - # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` - substituteInPlace CMakeLists.txt \ - --replace-fail "NOT TARGET gtest" "FALSE" - ''; - - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix b/pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix deleted file mode 100644 index 63fba911677c..000000000000 --- a/pkgs/development/rocm-modules/6/llvm/stage-3/pstl.nix +++ /dev/null @@ -1,16 +0,0 @@ -{ - stdenv, - callPackage, - rocmUpdateScript, -}: - -callPackage ../base.nix rec { - inherit stdenv rocmUpdateScript; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - buildTests = false; # Too many errors - targetName = "pstl"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - checkTargets = [ "check-${targetName}" ]; -} diff --git a/pkgs/development/rocm-modules/6/migraphx/default.nix b/pkgs/development/rocm-modules/6/migraphx/default.nix index 137f3a234708..3c6b7b68b39a 100644 --- a/pkgs/development/rocm-modules/6/migraphx/default.nix +++ b/pkgs/development/rocm-modules/6/migraphx/default.nix @@ -7,7 +7,6 @@ cmake, rocm-cmake, clr, - clang-tools-extra, openmp, rocblas, rocmlir, @@ -54,7 +53,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "migraphx"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -71,7 +70,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "AMDMIGraphX"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-VDYUSpWYAdJ63SKVCO26DVAC3RtZM7otqN0sYUA6DBQ="; + hash = "sha256-h9cTbrMwHeRGVJS/uHQnCXplNcrBqxbhwz2AcAEso0M="; }; nativeBuildInputs = @@ -80,7 +79,6 @@ stdenv.mkDerivation (finalAttrs: { cmake rocm-cmake clr - clang-tools-extra python3Packages.python ] ++ lib.optionals buildDocs [ @@ -172,8 +170,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { diff --git a/pkgs/development/rocm-modules/6/miopen/default.nix b/pkgs/development/rocm-modules/6/miopen/default.nix index e815ca00b7f0..99a1ff9f01af 100644 --- a/pkgs/development/rocm-modules/6/miopen/default.nix +++ b/pkgs/development/rocm-modules/6/miopen/default.nix @@ -10,9 +10,13 @@ rocm-cmake, rocblas, rocmlir, + rocrand, + rocm-runtime, + rocm-merged-llvm, + hipblas-common, + hipblas, + hipblaslt, clr, - clang-tools-extra, - clang-ocl, composable_kernel, frugally-deep, rocm-docs-core, @@ -30,43 +34,53 @@ rocm-comgr, roctracer, python3Packages, + # FIXME: should be able to use all clr targets + gpuTargets ? [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ], # clr.gpuTargets buildDocs ? false, # Needs internet because of rocm-docs-core buildTests ? false, + withComposableKernel ? composable_kernel.anyGfx9Target, }: let - version = "6.0.2"; + # FIXME: cmake files need patched to include this properly + cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "MIOpen"; rev = "rocm-${version}"; - hash = "sha256-mbOdlSb0ESKi9hMkq3amv70Xkp/YKnZYre24d/y5TD0="; + hash = "sha256-KV+tJPD4HQayY8zD4AdOFxxYRnyI47suxX5OgZ7mpdU="; fetchLFS = true; + fetchSubmodules = true; + # WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream leaveDotGit = true; - - # If you're reading this, it's gonna take a bit of time. - # fetchSubModules doesn't work with postFetch??? - # fetchLFS isn't actually fetching the LFS files... postFetch = '' export HOME=$(mktemp -d) cd $out - - # We need more history to fetch LFS files + set -x git remote add origin $url - git fetch origin + git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version} git clean -fdx - git checkout rocm-${version} - - # We need to do this manually since using leaveDotGit and fetchSubmodules errors - git submodule update --init - - # Fetch the LFS files + git switch -c rocm-${version} refs/tags/rocm-${version} + git config lfs.fetchexclude "none" + rm .lfsconfig git lfs install - git lfs fetch --all + git lfs track "*.kdb.bz2" + GIT_TRACE=1 git lfs fetch --include="src/kernels/**" + GIT_TRACE=1 git lfs pull --include="src/kernels/**" git lfs checkout - # Remove the defunct .git folder rm -rf .git ''; }; @@ -112,8 +126,13 @@ stdenv.mkDerivation (finalAttrs: { inherit version src; pname = "miopen"; + env.CFLAGS = cFlags; + env.CXXFLAGS = cFlags; + # Find zstd and add to target. Mainly for torch. patches = [ + ./skip-preexisting-dbs.patch + ./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448 (fetchpatch { url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch"; hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M="; @@ -122,11 +141,14 @@ stdenv.mkDerivation (finalAttrs: { url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; }) - (fetchpatch { - name = "Extend-MIOpen-ISA-compatibility.patch"; - url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; - hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; - }) + # FIXME: We need to rebase or drop this arch compat patch + # https://github.com/ROCm/MIOpen/issues/3540 suggests that + # arch compat patching doesn't work correctly for gfx1031 + # (fetchpatch { + # name = "Extend-MIOpen-ISA-compatibility.patch"; + # url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; + # hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; + # }) ]; outputs = @@ -139,21 +161,24 @@ stdenv.mkDerivation (finalAttrs: { ++ lib.optionals buildTests [ "test" ]; + enableParallelBuilding = true; + env.ROCM_PATH = clr; + env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ]; + env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; nativeBuildInputs = [ pkg-config cmake rocm-cmake clr - clang-tools-extra ]; buildInputs = [ + hipblas + hipblas-common rocblas rocmlir - clang-ocl - composable_kernel half boost sqlite @@ -161,6 +186,11 @@ stdenv.mkDerivation (finalAttrs: { nlohmann_json frugally-deep roctracer + rocrand + hipblaslt + ] + ++ lib.optionals withComposableKernel [ + composable_kernel ] ++ lib.optionals buildDocs [ latex @@ -178,15 +208,32 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_FLAGS=-Wno-#warnings" # -> - "-DUNZIPPER=${bzip2}/bin/bunzip2" + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" + "-DMIOPEN_USE_SQLITE_PERFDB=ON" + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" + "-DCMAKE_BUILD_TYPE=Release" + + # needs to stream to stdout so bzcat rather than bunzip2 + "-DUNZIPPER=${bzip2}/bin/bzcat" + + "-DCMAKE_C_COMPILER=amdclang" + "-DCMAKE_CXX_COMPILER=amdclang++" + "-DROCM_PATH=${clr}" + "-DHIP_ROOT_DIR=${clr}" + (lib.cmakeBool "MIOPEN_USE_ROCBLAS" true) + (lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true) + (lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel) + (lib.cmakeBool "MIOPEN_USE_HIPRTC" true) + (lib.cmakeBool "MIOPEN_USE_COMGR" true) + "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" "-DMIOPEN_BACKEND=HIP" ] ++ lib.optionals buildTests [ @@ -195,24 +242,29 @@ stdenv.mkDerivation (finalAttrs: { ]; postPatch = '' - patchShebangs test src/composable_kernel fin utils install_deps.cmake + substituteInPlace cmake/ClangTidy.cmake \ + --replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy) + endmacro() + macro(enable_clang_tidy_unused)' \ + --replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET) + return()' - substituteInPlace CMakeLists.txt \ - --replace "unpack_db(\"\''${CMAKE_SOURCE_DIR}/src/kernels/\''${FILE_NAME}.kdb.bz2\")" "" \ - --replace "MIOPEN_HIP_COMPILER MATCHES \".*clang\\\\+\\\\+$\"" "true" \ - --replace "set(MIOPEN_TIDY_ERRORS ALL)" "" # error: missing required key 'key' + patchShebangs test src/composable_kernel fin utils install_deps.cmake substituteInPlace test/gtest/CMakeLists.txt \ --replace "include(googletest)" "" - substituteInPlace test/gtest/CMakeLists.txt \ - --replace-fail " gtest_main " " ${gtest}/lib/libgtest.so ${gtest}/lib/libgtest_main.so " - ln -sf ${gfx900} src/kernels/gfx900.kdb ln -sf ${gfx906} src/kernels/gfx906.kdb ln -sf ${gfx908} src/kernels/gfx908.kdb ln -sf ${gfx90a} src/kernels/gfx90a.kdb ln -sf ${gfx1030} src/kernels/gfx1030.kdb + mkdir -p build/share/miopen/db/ + ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb + ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb + ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb + ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb + ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb ''; # Unfortunately, it seems like we have to call make on these manually @@ -249,13 +301,14 @@ stdenv.mkDerivation (finalAttrs: { ) } $test/bin/* ''; + # doCheck = false; # FIXME: clang-tidy really slow :( requiredSystemFeatures = [ "big-parallel" ]; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -264,8 +317,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/miopen/fix-isnan.patch b/pkgs/development/rocm-modules/6/miopen/fix-isnan.patch new file mode 100644 index 000000000000..71c140d57c8e --- /dev/null +++ b/pkgs/development/rocm-modules/6/miopen/fix-isnan.patch @@ -0,0 +1,31 @@ +From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001 +From: "Sv. Lockal" +Date: Tue, 24 Dec 2024 14:43:10 +0000 +Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems + +Closes #3441 +--- + driver/reducecalculation_driver.hpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp +index 8226b3c953..2001969509 100644 +--- a/driver/reducecalculation_driver.hpp ++++ b/driver/reducecalculation_driver.hpp +@@ -33,6 +33,7 @@ + #include "random.hpp" + #include + #include ++#include + #include + #include + #include +@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc, + for(size_t i = 0; i < reduce_size; ++i) + { + Tcheck val = static_cast(input[input_idx]); +- if(nanPropagation && isnan(val)) ++ if(nanPropagation && std::isnan(val)) + { + val = 0.0f; + } diff --git a/pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch b/pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch new file mode 100644 index 000000000000..89be2f4076cb --- /dev/null +++ b/pkgs/development/rocm-modules/6/miopen/skip-preexisting-dbs.patch @@ -0,0 +1,22 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index d0ffaf983..0b9ed0952 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -554,7 +554,7 @@ endif() + function(unpack_db db_bzip2_file) + get_filename_component(__fname ${db_bzip2_file} NAME_WLE) + add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname} +- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname}) ++ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname}) + string(REPLACE "." "_" __tname ${__fname}) + add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}) + +@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file) + if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db") + add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt + DEPENDS sqlite2txt generate_${__tname} +- COMMAND $ ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt ++ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $ ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt + ) + add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt) + add_dependencies(generate_kernels generate_${__tname}_txt) diff --git a/pkgs/development/rocm-modules/6/mivisionx/default.nix b/pkgs/development/rocm-modules/6/mivisionx/default.nix index 1b117ad3fc4d..22670c5198b7 100644 --- a/pkgs/development/rocm-modules/6/mivisionx/default.nix +++ b/pkgs/development/rocm-modules/6/mivisionx/default.nix @@ -12,7 +12,6 @@ rocblas, miopen, migraphx, - clang, openmp, protobuf, qtcreator, @@ -43,13 +42,13 @@ stdenv.mkDerivation (finalAttrs: { "cpu" ); - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "MIVisionX"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-d32lcJq24MXeIWbNbo6putWaol5kF2io6cz4ZuL+DbE="; + hash = "sha256-SisCbUDCAiWQ1Ue7qrtoT6vO/1ztzqji+3cJD6MXUNw="; }; patches = [ @@ -98,6 +97,9 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_PREFIX_PYTHON=lib" + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" # "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')` ] ++ lib.optionals (gpuTargets != [ ]) [ @@ -115,37 +117,26 @@ stdenv.mkDerivation (finalAttrs: { postPatch = '' # We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...` - export CXXFLAGS+="--rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" - patchShebangs rocAL/rocAL_pybind/examples - - # Properly find miopen + export CXXFLAGS+=" --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" + # Properly find miopen, fix ffmpeg version detection substituteInPlace amd_openvx_extensions/CMakeLists.txt \ --replace "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \ --replace "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h" # Properly find turbojpeg - substituteInPlace amd_openvx/cmake/FindTurboJpeg.cmake \ - --replace "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \ - --replace "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib" - - # Fix bad paths - substituteInPlace rocAL/rocAL/rocAL_hip/CMakeLists.txt amd_openvx_extensions/amd_nn/nn_hip/CMakeLists.txt amd_openvx/openvx/hipvx/CMakeLists.txt \ - --replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clang}/bin/clang++" + substituteInPlace cmake/FindTurboJpeg.cmake \ + --replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \ + --replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib" ''; postBuild = lib.optionalString buildDocs '' python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html ''; - postInstall = lib.optionalString (!useOpenCL && !useCPU) '' - patchelf $out/lib/rocal_pybind*.so --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" - chmod +x $out/lib/rocal_pybind*.so - ''; - passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -154,8 +145,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; + broken = useOpenCL; }; }) diff --git a/pkgs/development/rocm-modules/6/mscclpp/default.nix b/pkgs/development/rocm-modules/6/mscclpp/default.nix new file mode 100644 index 000000000000..b7fa373e80a7 --- /dev/null +++ b/pkgs/development/rocm-modules/6/mscclpp/default.nix @@ -0,0 +1,42 @@ +{ + fetchFromGitHub, + stdenv, + cmake, + clr, + numactl, + nlohmann_json, +}: +stdenv.mkDerivation { + pname = "mscclpp"; + version = "unstable-2024-12-13"; + src = fetchFromGitHub { + owner = "microsoft"; + repo = "mscclpp"; + rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37"; + hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig="; + }; + nativeBuildInputs = [ + cmake + ]; + buildInputs = [ + clr + numactl + ]; + postPatch = '' + substituteInPlace CMakeLists.txt \ + --replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100" + ''; + cmakeFlags = [ + "-DMSCCLPP_BYPASS_GPU_CHECK=ON" + "-DMSCCLPP_USE_ROCM=ON" + "-DMSCCLPP_BUILD_TESTS=OFF" + "-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" + "-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100" + "-DMSCCLPP_BUILD_APPS_NCCL=ON" + "-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF" + "-DFETCHCONTENT_QUIET=OFF" + "-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS" + "-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}" + ]; + env.ROCM_PATH = clr; +} diff --git a/pkgs/development/rocm-modules/6/rccl/default.nix b/pkgs/development/rocm-modules/6/rccl/default.nix index dc5fd0534b06..dede1e3da4b1 100644 --- a/pkgs/development/rocm-modules/6/rccl/default.nix +++ b/pkgs/development/rocm-modules/6/rccl/default.nix @@ -6,18 +6,35 @@ cmake, rocm-cmake, rocm-smi, + rocm-core, clr, + mscclpp, perl, hipify, gtest, chrpath, + rocprofiler, + rocprofiler-register, + autoPatchelfHook, buildTests ? false, - gpuTargets ? [ ], + gpuTargets ? (clr.localGpuTargets or [ ]), }: +let + useAsan = buildTests; + useUbsan = buildTests; + san = lib.optionalString (useAsan || useUbsan) ( + "-fno-gpu-sanitize -fsanitize=undefined " + + (lib.optionalString useAsan "-fsanitize=address -shared-libsan ") + ); +in +# Note: we can't properly test or make use of multi-node collective ops +# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support +# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver +# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros stdenv.mkDerivation (finalAttrs: { - pname = "rccl"; - version = "6.0.2"; + pname = "rccl${clr.gpuArchSuffix}"; + version = "6.3.1"; outputs = [ @@ -27,11 +44,17 @@ stdenv.mkDerivation (finalAttrs: { "test" ]; + patches = [ + ./fix-mainline-support-and-ub.diff + ./enable-mscclpp-on-all-gfx9.diff + ./rccl-test-missing-iomanip.diff + ]; + src = fetchFromGitHub { owner = "ROCm"; repo = "rccl"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-Oyml47yGEB7fALxBcDjqFngS38cnI39sDj94/JV7wE0="; + hash = "sha256-61yvFqloOO6qtn0H6XsAPvJ6LKlOeXgTD/xbjCuB3zQ="; }; nativeBuildInputs = [ @@ -40,12 +63,16 @@ stdenv.mkDerivation (finalAttrs: { clr perl hipify + autoPatchelfHook # ASAN doesn't add rpath without this ]; buildInputs = [ rocm-smi gtest + rocprofiler + rocprofiler-register + mscclpp ] ++ lib.optionals buildTests [ chrpath @@ -53,8 +80,17 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DHIP_CLANG_NUM_PARALLEL_JOBS=4" + "-DCMAKE_BUILD_TYPE=Release" + "-DROCM_PATH=${clr}" + "-DHIP_COMPILER=${clr}/bin/amdclang++" + "-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++" + "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" + "-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h + "-DENABLE_MSCCL_KERNEL=ON" + "-DENABLE_MSCCLPP=ON" + "-DMSCCLPP_ROOT=${mscclpp}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -62,32 +98,37 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_INCLUDEDIR=include" ] ++ lib.optionals (gpuTargets != [ ]) [ + # AMD can't make up their minds and keep changing which one is used in different projects. "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_TESTS=ON" ]; + # -O2 and -fno-strict-aliasing due to UB issues in RCCL :c + # Reported upstream + env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; + env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; + env.LDFLAGS = "${san}"; postPatch = '' patchShebangs src tools - - # Really strange behavior, `#!/usr/bin/env perl` should work... - substituteInPlace CMakeLists.txt \ - --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" \ - --replace-warn "-parallel-jobs=12" "-parallel-jobs=1" \ - --replace-warn "-parallel-jobs=16" "-parallel-jobs=1" ''; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/* $test/bin - rmdir $out/bin - ''; + postInstall = + lib.optionalString useAsan '' + patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so + '' + + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/* $test/bin + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -99,8 +140,5 @@ stdenv.mkDerivation (finalAttrs: { ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff b/pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff new file mode 100644 index 000000000000..6fc375921064 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rccl/enable-mscclpp-on-all-gfx9.diff @@ -0,0 +1,13 @@ +diff --git a/src/init.cc b/src/init.cc +index 738f756..1b0e4fc 100644 +--- a/src/init.cc ++++ b/src/init.cc +@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) { + if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) { + hipDeviceProp_t devProp; + CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev)); +- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94"); ++ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9"); + if (comm->mscclppCompatible) { + bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0); + auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId]; diff --git a/pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff b/pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff new file mode 100644 index 000000000000..15efac82c5ab --- /dev/null +++ b/pkgs/development/rocm-modules/6/rccl/fix-mainline-support-and-ub.diff @@ -0,0 +1,178 @@ +diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h +index 8c5f081..9922b79 100644 +--- a/src/include/bootstrap.h ++++ b/src/include/bootstrap.h +@@ -10,11 +10,13 @@ + #include "nccl.h" + #include "comm.h" + ++// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128] + struct ncclBootstrapHandle { + uint64_t magic; + union ncclSocketAddress addr; + }; + static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID"); ++static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB"); + + ncclResult_t bootstrapNetInit(); + ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv); +diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc +index b3063d5..464b80d 100644 +--- a/src/misc/rocmwrap.cc ++++ b/src/misc/rocmwrap.cc +@@ -131,9 +131,12 @@ static void initOnceFunc() { + //format and store the kernel conf file location + snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release); + fp = fopen(kernel_conf_file, "r"); +- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file"); ++ if (fp == NULL) { ++ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled"); ++ } + //look for kernel_opt1 and kernel_opt2 in the conf file and check +- while (fgets(buf, sizeof(buf), fp) != NULL) { ++ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary. ++ while (fp && fgets(buf, sizeof(buf), fp) != NULL) { + if (strstr(buf, kernel_opt1) != NULL) { + found_opt1 = 1; + INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release); +@@ -143,11 +146,12 @@ static void initOnceFunc() { + INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release); + } + } +- if (!found_opt1 || !found_opt2) { ++ if (fp && (!found_opt1 || !found_opt2)) { + dmaBufSupport = 0; + INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release); + INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support"); + } ++ if (fp) fclose(fp); + + if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled"); + else goto error; +diff --git a/src/nccl.h.in b/src/nccl.h.in +index 1d127b0..6296073 100644 +--- a/src/nccl.h.in ++++ b/src/nccl.h.in +@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t; + #define NCCL_UNIQUE_ID_BYTES 128 + /*! @brief Opaque unique id used to initialize communicators + @details The ncclUniqueId must be passed to all participating ranks */ +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId; + + /*! @defgroup rccl_result_code Result Codes + @details The various result codes that RCCL API calls may return +diff --git a/src/proxy.cc b/src/proxy.cc +index 50e5437..51bb401 100644 +--- a/src/proxy.cc ++++ b/src/proxy.cc +@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool { + + static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) { + if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) { +- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1)); ++ if (pool->pools) { ++ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1)); ++ } else { ++ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1)); ++ } + NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE)); + pool->banks++; + pool->offset = 0; +diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc +index 6d77784..49762d3 100644 +--- a/src/transport/net_ib.cc ++++ b/src/transport/net_ib.cc +@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() { + // Requires support from NIC driver modules + // Use ONLY for debugging! + moduleLoaded = 1; +- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1"); ++ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1"); + } + + if (moduleLoaded == -1) { +@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() { + // or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module) + const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version", + "/sys/kernel/memory_peers/amdkfd/version", +- "/sys/memory_peers/amdkfd/version"}; ++ "/sys/memory_peers/amdkfd/version", ++ NULL}; + int i = 0; + + while (memory_peers_paths[i]) { + if (access(memory_peers_paths[i], F_OK) == 0) { + moduleLoaded = 1; +- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]); + break; + } else { + moduleLoaded = 0; +@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() { + if (moduleLoaded == 0) { + // Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms` + // if your system uses native OS ib_peer module +- char buf[256]; +- FILE *fp = NULL; +- fp = fopen("/proc/kallsyms", "r"); ++ FILE *fp = fopen("/proc/kallsyms", "r"); ++ char *line = NULL; ++ size_t len = 0; + + if (fp == NULL) { +- INFO(NCCL_INIT,"Could not open /proc/kallsyms"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client"); + } else { +- while (fgets(buf, sizeof(buf), fp) != NULL) { +- if (strstr(buf, "t ib_register_peer_memory_client") != NULL || +- strstr(buf, "T ib_register_peer_memory_client") != NULL) { ++ while (getline(&line, &len, fp) > 0) { ++ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) { + moduleLoaded = 1; +- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms"); + break; + } + } + } ++ if (line) free(line); ++ if (fp) fclose(fp); + } + #else + // Check for the nv_peer_mem module being loaded +@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() { + #endif + } + if (moduleLoaded == 0) { +- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol"); ++ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol"); + return ncclSystemError; + } + return ncclSuccess; +diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h +index 2c86c33..5801c61 100755 +--- a/tools/ib-test/include/nccl.h ++++ b/tools/ib-test/include/nccl.h +@@ -31,7 +31,7 @@ extern "C" { + typedef struct ncclComm* ncclComm_t; + + #define NCCL_UNIQUE_ID_BYTES 128 +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; + + /* Error type */ + typedef enum { ncclSuccess = 0, +diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h +index 729561b..4e4bdd9 100644 +--- a/tools/topo_expl/include/nccl.h ++++ b/tools/topo_expl/include/nccl.h +@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t; + #define NCCL_COMM_NULL NULL + + #define NCCL_UNIQUE_ID_BYTES 128 +-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; ++typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId; + + /*! @brief Error type */ + typedef enum { ncclSuccess = 0, diff --git a/pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff b/pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff new file mode 100644 index 000000000000..3cc54a83c142 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rccl/rccl-test-missing-iomanip.diff @@ -0,0 +1,10 @@ +--- a/test/common/TestBed.cpp ++++ b/test/common/TestBed.cpp +@@ -4,6 +4,7 @@ + * See LICENSE.txt for license information + ************************************************************************/ + #include ++#include + #include "TestBed.hpp" + #include + diff --git a/pkgs/development/rocm-modules/6/rdc/default.nix b/pkgs/development/rocm-modules/6/rdc/default.nix index 2422ef71579c..fc28202a11f1 100644 --- a/pkgs/development/rocm-modules/6/rdc/default.nix +++ b/pkgs/development/rocm-modules/6/rdc/default.nix @@ -4,6 +4,7 @@ fetchFromGitHub, rocmUpdateScript, cmake, + amdsmi, rocm-smi, rocm-runtime, libcap, @@ -46,7 +47,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rdc"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -63,7 +64,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rdc"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-QugcajxILmDeQiWG5uAUO41Wut45irg2Ynufgn1bmps="; + hash = "sha256-sKsti7LeWsxvOmc9h/srsl0OmHkJIRNRiV+8mFVG3/M="; }; nativeBuildInputs = @@ -79,6 +80,7 @@ stdenv.mkDerivation (finalAttrs: { buildInputs = [ + amdsmi rocm-smi rocm-runtime libcap @@ -126,8 +128,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -136,7 +138,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - # broken = versions.minor finalAttrs.version != versions.minor rocm-smi.version || versionAtLeast finalAttrs.version "7.0.0"; - broken = true; # Too many errors, unsure how to fix }; }) diff --git a/pkgs/development/rocm-modules/6/rocalution/default.nix b/pkgs/development/rocm-modules/6/rocalution/default.nix index 48cc9ab3f8cc..fc50702cabb2 100644 --- a/pkgs/development/rocm-modules/6/rocalution/default.nix +++ b/pkgs/development/rocm-modules/6/rocalution/default.nix @@ -11,6 +11,7 @@ rocrand, clr, git, + pkg-config, openmp, openmpi, gtest, @@ -22,7 +23,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocalution"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -42,7 +43,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocALUTION"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-mrN+CI2mqaMi8oKxui7HAIE2qSn50aNaFipkWwYMtbc="; + hash = "sha256-xdZ3HUiRGsreHfJH8RgL/s3jGyC5ABmBKcEfgtqWg8Y="; }; nativeBuildInputs = [ @@ -50,6 +51,7 @@ stdenv.mkDerivation (finalAttrs: { rocm-cmake clr git + pkg-config ]; buildInputs = @@ -65,9 +67,12 @@ stdenv.mkDerivation (finalAttrs: { gtest ]; + CXXFLAGS = "-I${openmp.dev}/include"; cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" "-DROCM_PATH=${clr}" "-DHIP_ROOT_DIR=${clr}" "-DSUPPORT_HIP=ON" @@ -82,6 +87,7 @@ stdenv.mkDerivation (finalAttrs: { ] ++ lib.optionals (gpuTargets != [ ]) [ "-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}" + "-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" @@ -115,8 +121,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -125,8 +131,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix index 7c5fbe801b19..9cb3ff03d7f5 100644 --- a/pkgs/development/rocm-modules/6/rocblas/default.nix +++ b/pkgs/development/rocm-modules/6/rocblas/default.nix @@ -14,21 +14,24 @@ gtest, gfortran, openmp, + git, amd-blis, + zstd, + hipblas-common, + hipblaslt, python3Packages, + rocm-smi, buildTensile ? true, - buildTests ? false, - buildBenchmarks ? false, - tensileLogic ? "asm_full", - tensileCOVersion ? "default", + buildTests ? true, + buildBenchmarks ? true, # https://github.com/ROCm/Tensile/issues/1757 # Allows gfx101* users to use rocBLAS normally. # Turn the below two values to `true` after the fix has been cherry-picked # into a release. Just backporting that single fix is not enough because it # depends on some previous commits. - tensileSepArch ? false, - tensileLazyLib ? false, - tensileLibFormat ? "msgpack", + tensileSepArch ? true, + tensileLazyLib ? true, + withHipBlasLt ? true, # `gfx940`, `gfx941` are not present in this list because they are early # engineering samples, and all final MI300 hardware are `gfx942`: # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 @@ -37,38 +40,47 @@ # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will # always try to use `gfx1010` code objects, hence building for `gfx1012` is # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 - gpuTargets ? [ - "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" - ], + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1010" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ] + ), }: +let + gpuTargets' = lib.concatStringsSep ";" gpuTargets; +in stdenv.mkDerivation (finalAttrs: { - pname = "rocblas"; - version = "6.0.2"; + pname = "rocblas${clr.gpuArchSuffix}"; + version = "6.3.1"; - outputs = - [ - "out" - ] - ++ lib.optionals buildTests [ - "test" - ] - ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = [ + "out" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocBLAS"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-G68d/gvBbTdNx8xR3xY+OkBm5Yxq1NFjxby9BbpOcUk="; + hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4="; }; nativeBuildInputs = [ cmake + # no ninja, it buffers console output and nix times out long periods of no output rocm-cmake clr + git ] ++ lib.optionals buildTensile [ tensile @@ -77,12 +89,17 @@ stdenv.mkDerivation (finalAttrs: { buildInputs = [ python3 + hipblas-common + ] + ++ lib.optionals withHipBlasLt [ + hipblaslt ] ++ lib.optionals buildTensile [ + zstd msgpack libxml2 python3Packages.msgpack - python3Packages.joblib + python3Packages.zstandard ] ++ lib.optionals buildTests [ gtest @@ -91,38 +108,61 @@ stdenv.mkDerivation (finalAttrs: { gfortran openmp amd-blis + rocm-smi ] ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ python3Packages.pyyaml ]; + dontStrip = true; + env.CXXFLAGS = + "-O3 -DNDEBUG -I${hipblas-common}/include" + + lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis"; + # Fails to link tests if we don't add amd-blis libs + env.LDFLAGS = lib.optionalString ( + buildTests || buildBenchmarks + ) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas"; + env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++"; + cmakeFlags = [ - (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc") - (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + (lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release") + (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) + (lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib") (lib.cmakeFeature "python" "python3") - (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets)) + (lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets') + (lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets') + (lib.cmakeFeature "GPU_TARGETS" gpuTargets') (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") + (lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt) (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) - # rocblas header files are not installed unless we set this - (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include") + (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks) + (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true) + # Temporarily set variables to work around upstream CMakeLists issue + # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_LIBDIR=lib" ] ++ lib.optionals buildTensile [ + "-DCPACK_SET_DESTDIR=OFF" + "-DLINK_BLIS=ON" + "-DTensile_CODE_OBJECT_VERSION=default" + "-DTensile_LOGIC=asm_full" + "-DTensile_LIBRARY_FORMAT=msgpack" (lib.cmakeBool "BUILD_WITH_PIP" false) - (lib.cmakeFeature "Tensile_LOGIC" tensileLogic) - (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion) (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) - (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat) - (lib.cmakeBool "Tensile_PRINT_DEBUG" true) - ] - ++ lib.optionals (buildTests || buildBenchmarks) [ - (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis") ]; + passthru.amdgpu_targets = gpuTargets'; + patches = [ (fetchpatch { name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; @@ -135,14 +175,17 @@ stdenv.mkDerivation (finalAttrs: { postPatch = '' substituteInPlace cmake/build-options.cmake \ --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' + substituteInPlace CMakeLists.txt \ + --replace-fail "4.42.0" "4.43.0" ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; + enableParallelBuilding = true; requiredSystemFeatures = [ "big-parallel" ]; meta = with lib; { @@ -151,8 +194,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocdbgapi/default.nix b/pkgs/development/rocm-modules/6/rocdbgapi/default.nix index 9d1006c32945..6ca3e5b57558 100644 --- a/pkgs/development/rocm-modules/6/rocdbgapi/default.nix +++ b/pkgs/development/rocm-modules/6/rocdbgapi/default.nix @@ -1,66 +1,76 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, git -, rocm-comgr -, rocm-runtime -, hwdata -, texliveSmall -, doxygen -, graphviz -, buildDocs ? true +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + git, + rocm-comgr, + rocm-runtime, + hwdata, + texliveSmall, + doxygen, + graphviz, + buildDocs ? true, }: let - latex = lib.optionalAttrs buildDocs (texliveSmall.withPackages (ps: with ps; [ - changepage - latexmk - varwidth - multirow - hanging - adjustbox - collectbox - stackengine - enumitem - alphalph - wasysym - sectsty - tocloft - newunicodechar - etoc - helvetic - wasy - courier - ])); -in stdenv.mkDerivation (finalAttrs: { + latex = lib.optionalAttrs buildDocs ( + texliveSmall.withPackages ( + ps: with ps; [ + changepage + latexmk + varwidth + multirow + hanging + adjustbox + collectbox + stackengine + enumitem + alphalph + wasysym + sectsty + tocloft + newunicodechar + etoc + helvetic + wasy + courier + ] + ) + ); +in +stdenv.mkDerivation (finalAttrs: { pname = "rocdbgapi"; - version = "6.0.2"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildDocs [ - "doc" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildDocs [ + "doc" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "ROCdbgapi"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-+CxaTmxRt/RicqQddqIEHs8vvAPCMKXkWg7kbZvnUsQ="; + hash = "sha256-6itfBrWVspobU47aiJAOQoxT8chwrq9scRn0or3bXto="; }; - nativeBuildInputs = [ - cmake - rocm-cmake - git - ] ++ lib.optionals buildDocs [ - latex - doxygen - graphviz - ]; + nativeBuildInputs = + [ + cmake + rocm-cmake + git + ] + ++ lib.optionals buildDocs [ + latex + doxygen + graphviz + ]; buildInputs = [ rocm-comgr @@ -83,21 +93,15 @@ in stdenv.mkDerivation (finalAttrs: { make -j$NIX_BUILD_CORES doc ''; - postInstall = '' - substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-config.cmake \ - --replace "/build/source/build/" "" - - substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-targets.cmake \ - --replace "/build/source/build" "$out" - '' + lib.optionalString buildDocs '' + postInstall = lib.optionalString buildDocs '' mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html rmdir $out/share/html ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -106,6 +110,5 @@ in stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocfft/default.nix b/pkgs/development/rocm-modules/6/rocfft/default.nix index c8af11ad4d51..53cd7523b52a 100644 --- a/pkgs/development/rocm-modules/6/rocfft/default.nix +++ b/pkgs/development/rocm-modules/6/rocfft/default.nix @@ -1,5 +1,4 @@ { - rocfft, lib, stdenv, fetchFromGitHub, @@ -15,18 +14,18 @@ gtest, openmp, rocrand, - gpuTargets ? [ ], + gpuTargets ? clr.localGpuTargets or clr.gpuTargets, }: stdenv.mkDerivation (finalAttrs: { - pname = "rocfft"; - version = "6.0.2"; + pname = "rocfft${clr.gpuArchSuffix}"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocFFT"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-6Gjsy14GeR08VqnNmFhu8EyYDnQ+VZRlg+u9MAAWfHc="; + hash = "sha256-RrxdwZ64uC7lQzyJI1eGHX2dmRnW8TfNThnuvuz5XWo="; }; nativeBuildInputs = [ @@ -36,6 +35,8 @@ stdenv.mkDerivation (finalAttrs: { rocm-cmake ]; + # FIXME: rocfft_aot_helper runs at the end of the build and has a risk of timing it out + # due to a long period with no terminal output buildInputs = [ sqlite ]; cmakeFlags = @@ -156,8 +157,8 @@ stdenv.mkDerivation (finalAttrs: { updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; }; @@ -169,8 +170,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocgdb/default.nix b/pkgs/development/rocm-modules/6/rocgdb/default.nix index 93484b42c40f..5e246c6a7feb 100644 --- a/pkgs/development/rocm-modules/6/rocgdb/default.nix +++ b/pkgs/development/rocm-modules/6/rocgdb/default.nix @@ -21,13 +21,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocgdb"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "ROCgdb"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-XeX/k8gfo9HgcUSIjs35C7IqCmFhvBOqQJSOoPF6HK4="; + hash = "sha256-P9NbYMrCs0UpnaEIP+bJEM6yPiRHzl0lI0J4+A7/ePc="; }; nativeBuildInputs = [ @@ -91,8 +91,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -101,6 +101,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.gpl3Plus; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-cmake/default.nix b/pkgs/development/rocm-modules/6/rocm-cmake/default.nix index cd3fd9c035da..16969b9cf9e5 100644 --- a/pkgs/development/rocm-modules/6/rocm-cmake/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-cmake/default.nix @@ -1,27 +1,31 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + rocm-core, + cmake, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-cmake"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocm-cmake"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-qSjWT0KOQ5oDV06tfnKN+H/JzdoOnR9KY0c+SjvDepM="; + hash = "sha256-8kEcwqHJF584AteuddP7Ai7n6ltVZJ8a6RsYIWGMs0U="; }; nativeBuildInputs = [ cmake ]; + buildInputs = [ rocm-core ]; + passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -30,6 +34,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.mit; maintainers = teams.rocm.members; platforms = platforms.unix; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-comgr/default.nix b/pkgs/development/rocm-modules/6/rocm-comgr/default.nix index fb85e6dd2a1b..64f4f8505311 100644 --- a/pkgs/development/rocm-modules/6/rocm-comgr/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-comgr/default.nix @@ -1,11 +1,13 @@ { lib, stdenv, - fetchFromGitHub, - rocmUpdateScript, + fetchpatch, cmake, - rocm-cmake, + python3, + rocm-merged-llvm, rocm-device-libs, + zlib, + zstd, libxml2, }: @@ -20,34 +22,45 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocm-comgr"; - version = "6.0.2"; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; - src = fetchFromGitHub { - owner = "ROCm"; - repo = "ROCm-CompilerSupport"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-9HuNU/k+kPJMlzqOTM20gm6SAOWJe9tpAZXEj4erdmI="; - }; + sourceRoot = "${finalAttrs.src.name}/amd/comgr"; - sourceRoot = "${finalAttrs.src.name}/lib/comgr"; + patches = [ + # [Comgr] Extend ISA compatibility + (fetchpatch { + sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k="; + url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch"; + relative = "amd/comgr"; + }) + #[Comgr] Extend ISA compatibility for CCOB + (fetchpatch { + sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY="; + url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch"; + relative = "amd/comgr"; + }) + ]; nativeBuildInputs = [ cmake - rocm-cmake + python3 ]; buildInputs = [ rocm-device-libs libxml2 + zlib + zstd + rocm-merged-llvm ]; - cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;X86" ]; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; + cmakeFlags = [ + "-DCMAKE_VERBOSE_MAKEFILE=ON" + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + ]; meta = with lib; { description = "APIs for compiling and inspecting AMDGPU code objects"; @@ -55,8 +68,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.ncsa; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-core/default.nix b/pkgs/development/rocm-modules/6/rocm-core/default.nix index 31f47fb26b97..e71205db3abb 100644 --- a/pkgs/development/rocm-modules/6/rocm-core/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-core/default.nix @@ -4,28 +4,43 @@ fetchFromGitHub, rocmUpdateScript, cmake, + writeText, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-core"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocm-core"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-sgL1UMt3o01zA8v41dyCG1fAsK/PkTRsHQJOvlNatZ4="; + hash = "sha256-UDnPGvgwzwv49CzF+Kt0v95CsxS33BZeqNcKw1K6jRI="; }; nativeBuildInputs = [ cmake ]; - cmakeFlags = [ "-DROCM_VERSION=${finalAttrs.version}" ]; + # FIXME: What's the correct way to set this? + env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}"; + env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}"; + env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}"; + cmakeFlags = [ + "-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}" + "-DROCM_VERSION=${finalAttrs.version}" + "-DBUILD_ID=${finalAttrs.env.BUILD_ID}" + ]; + setupHook = writeText "setupHook.sh" '' + export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}" + export BUILD_ID="${finalAttrs.env.BUILD_ID}" + export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}" + ''; + + passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - page = "tags?per_page=1"; - filter = ".[0].name | split(\"-\") | .[1]"; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; }; meta = with lib; { @@ -34,8 +49,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch b/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch index 500ff37a9905..de46bdb87258 100644 --- a/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch +++ b/pkgs/development/rocm-modules/6/rocm-device-libs/cmake.patch @@ -1,7 +1,7 @@ diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index 07c60eb..c736b3e 100644 ---- a/cmake/Packages.cmake -+++ b/cmake/Packages.cmake +--- a/amd/device-libs/cmake/Packages.cmake ++++ b/amd/device-libs/cmake/Packages.cmake @@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES IMPORTED_LOCATION \"${target_path}\")") endforeach() diff --git a/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix b/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix index a16eee74c3f1..81d2ab54e069 100644 --- a/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-device-libs/default.nix @@ -1,11 +1,14 @@ { lib, stdenv, - fetchFromGitHub, - rocmUpdateScript, cmake, - rocm-cmake, + ninja, libxml2, + zlib, + zstd, + ncurses, + rocm-merged-llvm, + python3, }: let @@ -19,30 +22,34 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocm-device-libs"; - version = "6.0.2"; + # In-tree with ROCm LLVM + inherit (rocm-merged-llvm) version; + src = rocm-merged-llvm.llvm-src; - src = fetchFromGitHub { - owner = "ROCm"; - repo = "ROCm-Device-Libs"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-7XG7oSkJ3EPWTYGea0I50eB1/DPMD5agmjctxZYTbLQ="; - }; + postPatch = '' + cd amd/device-libs + ''; patches = [ ./cmake.patch ]; nativeBuildInputs = [ cmake - rocm-cmake + ninja + python3 ]; - buildInputs = [ libxml2 ]; - cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" ]; + buildInputs = [ + libxml2 + zlib + zstd + ncurses + rocm-merged-llvm + ]; - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; + cmakeFlags = [ + "-DCMAKE_RELEASE_TYPE=Release" + "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + ]; meta = with lib; { description = "Set of AMD-specific device-side language runtime libraries"; @@ -50,8 +57,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.ncsa; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix b/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix index 06232b1575e3..5302f165e5d9 100644 --- a/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-docs-core/default.nix @@ -1,23 +1,23 @@ -{ lib -, stdenv -, fetchFromGitHub -, gitUpdater -, buildPythonPackage -, setuptools -, beautifulsoup4 -, gitpython -, pydata-sphinx-theme -, pygithub -, sphinx -, breathe -, myst-parser -, sphinx-book-theme -, sphinx-copybutton -, sphinx-design -, sphinx-external-toc -, sphinx-notfound-page -, pyyaml -, fastjsonschema +{ + lib, + fetchFromGitHub, + gitUpdater, + buildPythonPackage, + setuptools, + beautifulsoup4, + gitpython, + pydata-sphinx-theme, + pygithub, + sphinx, + breathe, + myst-parser, + sphinx-book-theme, + sphinx-copybutton, + sphinx-design, + sphinx-external-toc, + sphinx-notfound-page, + pyyaml, + fastjsonschema, }: # FIXME: Move to rocmPackages_common @@ -59,7 +59,10 @@ buildPythonPackage rec { meta = with lib; { description = "ROCm Documentation Python package for ReadTheDocs build standardization"; homepage = "https://github.com/ROCm/rocm-docs-core"; - license = with licenses; [ mit cc-by-40 ]; + license = with licenses; [ + mit + cc-by-40 + ]; maintainers = teams.rocm.members; platforms = platforms.linux; }; diff --git a/pkgs/development/rocm-modules/6/rocm-path/default.nix b/pkgs/development/rocm-modules/6/rocm-path/default.nix new file mode 100644 index 000000000000..57970ffa4406 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocm-path/default.nix @@ -0,0 +1,27 @@ +{ + symlinkJoin, + linkFarm, + clr, + hipblas, + hipblas-common, + rocblas, + rocsolver, + rocsparse, + rocm-device-libs, + rocm-smi, + llvm, +}: +symlinkJoin { + name = "rocm-path-${clr.version}"; + paths = [ + clr + hipblas-common + hipblas + rocblas + rocsolver + rocsparse + rocm-device-libs + rocm-smi + (linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; }) + ]; +} diff --git a/pkgs/development/rocm-modules/6/rocm-runtime/default.nix b/pkgs/development/rocm-modules/6/rocm-runtime/default.nix index a24c751a44ef..1ef12e9a9474 100644 --- a/pkgs/development/rocm-modules/6/rocm-runtime/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-runtime/default.nix @@ -6,37 +6,40 @@ rocmUpdateScript, pkg-config, cmake, + ninja, xxd, rocm-device-libs, - rocm-thunk, elfutils, libdrm, numactl, valgrind, libxml2, + rocm-merged-llvm, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-runtime"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "ROCR-Runtime"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-xNMG954HI9SOfvYYB/62fhmm9mmR4I10uHP2nqn9EgI="; + hash = "sha256-btpiIPV9REMvrmRSUzBIpBO6ehVIMmEmG+H8hqHDxdE="; }; - sourceRoot = "${finalAttrs.src.name}/src"; + env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w"; + env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w"; nativeBuildInputs = [ pkg-config cmake + ninja xxd + rocm-merged-llvm ]; buildInputs = [ - rocm-thunk elfutils libdrm numactl @@ -44,34 +47,56 @@ stdenv.mkDerivation (finalAttrs: { libxml2 ]; + cmakeFlags = [ + "-DBUILD_SHARED_LIBS=ON" + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + patches = [ + # Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272 (fetchpatch { - name = "extend-isa-compatibility-check.patch"; - url = "https://salsa.debian.org/rocm-team/rocr-runtime/-/raw/076026d43bbee7f816b81fea72f984213a9ff961/debian/patches/0004-extend-isa-compatibility-check.patch"; - hash = "sha256-cC030zVGS4kNXwaztv5cwfXfVwOldpLGV9iYgEfPEnY="; - stripLen = 1; + # [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int + url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch"; + hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE="; }) + (fetchpatch { + # [PATCH] rocr: refactor of runtime.cpp based on Coverity + url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch"; + hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0="; + }) + (fetchpatch { + # [PATCH] queues: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch"; + hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U="; + }) + (fetchpatch { + # [PATCH] topology: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch"; + hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w="; + }) + (fetchpatch { + # [PATCH] kfd_ioctl: fix UB due to 1 << 31 + url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch"; + hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns="; + }) + ./remove-hsa-aqlprofile-dep.patch ]; postPatch = '' - patchShebangs image/blit_src/create_hsaco_ascii_file.sh - patchShebangs core/runtime/trap_handler/create_trap_handler_header.sh - patchShebangs core/runtime/blit_shaders/create_blit_shader_header.sh + patchShebangs --host image core runtime substituteInPlace CMakeLists.txt \ --replace 'hsa/include/hsa' 'include/hsa' - # We compile clang before rocm-device-libs, so patch it in afterwards - # Replace object version: https://github.com/ROCm/ROCR-Runtime/issues/166 (TODO: Remove on LLVM update?) - substituteInPlace image/blit_src/CMakeLists.txt \ - --replace '-cl-denorms-are-zero' '-cl-denorms-are-zero --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode' \ - --replace '-mcode-object-version=4' '-mcode-object-version=5' + export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode" ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -80,8 +105,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ ncsa ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch b/pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch new file mode 100644 index 000000000000..b8ed57d049bc --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocm-runtime/remove-hsa-aqlprofile-dep.patch @@ -0,0 +1,27 @@ +libhsa-amd-aqlprofile64 library is unfree +Bug: https://github.com/ROCm/ROCm/issues/1781 +--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp ++++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const { + setFlag(HSA_EXTENSION_AMD_PC_SAMPLING); + } + +- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) { +- os::CloseLib(lib); +- setFlag(HSA_EXTENSION_AMD_AQLPROFILE); +- } +- + setFlag(HSA_EXTENSION_AMD_PROFILER); + + break; +--- a/runtime/hsa-runtime/core/runtime/hsa.cpp ++++ b/runtime/hsa-runtime/core/runtime/hsa.cpp +@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v + return HSA_STATUS_SUCCESS; + } + +- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) { ++ if (0) { + if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) { + debug_print("aqlprofile API incompatible ver %d, current ver %d\n", + version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR); diff --git a/pkgs/development/rocm-modules/6/rocm-smi/default.nix b/pkgs/development/rocm-modules/6/rocm-smi/default.nix index aeb38e4ad8de..c909764bc8e3 100644 --- a/pkgs/development/rocm-modules/6/rocm-smi/default.nix +++ b/pkgs/development/rocm-modules/6/rocm-smi/default.nix @@ -1,20 +1,21 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, wrapPython +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + wrapPython, }: stdenv.mkDerivation (finalAttrs: { pname = "rocm-smi"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocm_smi_lib"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-fS52hpTv1WEycwkGZLXjz383WJWzyk8RvJRshEQSG/A="; + hash = "sha256-j9pkyUt+p6IkhawIhiTymqDBydxXZunxmdyCyRN0RxE="; }; patches = [ ./cmake.patch ]; @@ -34,13 +35,14 @@ stdenv.mkDerivation (finalAttrs: { postInstall = '' wrapPythonProgramsIn $out + mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -49,6 +51,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = [ "x86_64-linux" ]; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocm-tests/default.nix b/pkgs/development/rocm-modules/6/rocm-tests/default.nix new file mode 100644 index 000000000000..84762ff70e73 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocm-tests/default.nix @@ -0,0 +1,32 @@ +{ + clr, + ollama, + python3Packages, + rocmPackages, + magma-hip, + emptyDirectory, + stdenv, +}: +# This package exists purely to have a bunch of passthru.tests attrs +stdenv.mkDerivation { + name = "rocm-tests"; + nativeBuildInputs = [ + clr + ]; + src = emptyDirectory; + postInstall = "mkdir -p $out"; + passthru.tests = { + ollama = ollama.override { + inherit rocmPackages; + acceleration = "rocm"; + }; + torch = python3Packages.torch.override { + inherit rocmPackages; + rocmSupport = true; + cudaSupport = false; + magma-hip = magma-hip.override { + inherit rocmPackages; + }; + }; + }; +} diff --git a/pkgs/development/rocm-modules/6/rocm-thunk/default.nix b/pkgs/development/rocm-modules/6/rocm-thunk/default.nix deleted file mode 100644 index 99a1d3c542d1..000000000000 --- a/pkgs/development/rocm-modules/6/rocm-thunk/default.nix +++ /dev/null @@ -1,54 +0,0 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, pkg-config -, cmake -, libdrm -, numactl -}: - -stdenv.mkDerivation (finalAttrs: { - pname = "rocm-thunk"; - version = "6.0.2"; - - src = fetchFromGitHub { - owner = "ROCm"; - repo = "ROCT-Thunk-Interface"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-F6Qi+A9DuSx2e4WSfp4cnniKr0CkCZcZqsKwQmmZHhk="; - }; - - nativeBuildInputs = [ - pkg-config - cmake - ]; - - buildInputs = [ - libdrm - numactl - ]; - - cmakeFlags = [ - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ]; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - meta = with lib; { - description = "Radeon open compute thunk interface"; - homepage = "https://github.com/ROCm/ROCT-Thunk-Interface"; - license = with licenses; [ bsd2 mit ]; - maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; - platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; - }; -}) diff --git a/pkgs/development/rocm-modules/6/rocminfo/default.nix b/pkgs/development/rocm-modules/6/rocminfo/default.nix index 9c88274d722f..79780012a405 100644 --- a/pkgs/development/rocm-modules/6/rocminfo/default.nix +++ b/pkgs/development/rocm-modules/6/rocminfo/default.nix @@ -9,24 +9,25 @@ busybox, python3, gnugrep, + clr, # Only for localGpuTargets # rocminfo requires that the calling user have a password and be in # the video group. If we let rocm_agent_enumerator rely upon # rocminfo's output, then it, too, has those requirements. Instead, # we can specify the GPU targets for this system (e.g. "gfx803" for # Polaris) such that no system call is needed for downstream # compilers to determine the desired target. - defaultTargets ? [ ], + defaultTargets ? (clr.localGpuTargets or [ ]), }: stdenv.mkDerivation (finalAttrs: { - version = "6.0.2"; + version = "6.3.1"; pname = "rocminfo"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocminfo"; rev = "rocm-${finalAttrs.version}"; - sha256 = "sha256-k0QeCyQcarGbAh4ft8Y7JBK6l2nWxDUc20XoYmtrMMs="; + sha256 = "sha256-TL57Mznq5qPorDON0EaINBCoEFMN4dcAmRfRgS//nok="; }; nativeBuildInputs = [ @@ -49,8 +50,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -59,9 +60,5 @@ stdenv.mkDerivation (finalAttrs: { license = licenses.ncsa; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; platforms = platforms.linux; - broken = - stdenv.hostPlatform.isAarch64 - || versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocmlir/default.nix b/pkgs/development/rocm-modules/6/rocmlir/default.nix index 4fe3bba2bd38..a403dadb882f 100644 --- a/pkgs/development/rocm-modules/6/rocmlir/default.nix +++ b/pkgs/development/rocm-modules/6/rocmlir/default.nix @@ -7,7 +7,6 @@ cmake, rocm-cmake, rocminfo, - ninja, clr, git, libxml2, @@ -20,6 +19,13 @@ buildTests ? false, # `argument of type 'NoneType' is not iterable` }: +# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly +# It seems to be forked from AMD's own LLVM +# If possible reusing the rocmPackages.llvm build would be better +# Would have to confirm it is compatible with ROCm's tagged LLVM. +# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways +# in subdirs + # Theoretically, we could have our MLIR have an output # with the source and built objects so that we can just # use it as the external LLVM repo for this @@ -36,7 +42,7 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocmlir${suffix}"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -50,13 +56,12 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocMLIR"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-AypY0vL8Ij1zLycwpG2EPWWl4utp4ejXpAK0Jj/UvrA="; + hash = "sha256-0SQ6uLDRfVfdCX+8a7D6pu6dYlFvX0HFzCDEvlKYfak="; }; nativeBuildInputs = [ cmake rocm-cmake - ninja clr python3Packages.python python3Packages.tomli @@ -75,23 +80,17 @@ stdenv.mkDerivation (finalAttrs: { ]; patches = [ - (fetchpatch { - name = "fix-TosaToRock-missing-includes.patch"; - url = "https://github.com/ROCm/rocMLIR/commit/80b8c94a5dd6ab832733116fe0339c1d6011ab57.patch"; - hash = "sha256-przg1AQZTiVbVd/4wA+KlGXu/RISO5n11FBkmUFKRSA="; - }) - (fetchpatch { - name = "fix-cmake-depedency-on-transforms.patch"; - url = "https://github.com/ROCm/rocMLIR/commit/b85ca4855e0f0214c2fd695e493c884cf08a3472.patch"; - hash = "sha256-m108PnwvDAN3xWko+gZMgvCNFl4LXTvC67JHXhFHeBc="; - }) + ./initparamdata-sort-const.patch ]; cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" - "-DLLVM_ENABLE_ZSTD=ON" - "-DLLVM_ENABLE_ZLIB=ON" + "-DCMAKE_BUILD_TYPE=Release" + "-DLLVM_USE_LINKER=lld" + "-DLLVM_ENABLE_ZSTD=FORCE_ON" + "-DLLVM_ENABLE_ZLIB=FORCE_ON" + "-DLLVM_ENABLE_LIBCXX=ON" "-DLLVM_ENABLE_TERMINFO=ON" "-DROCM_PATH=${clr}" # Manually define CMAKE_INSTALL_ @@ -99,9 +98,7 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] - ++ lib.optionals buildRockCompiler [ - "-DBUILD_FAT_LIBROCKCOMPILER=ON" + (lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler) ] ++ lib.optionals (!buildRockCompiler) [ "-DROCM_TEST_CHIPSET=gfx000" @@ -111,6 +108,10 @@ stdenv.mkDerivation (finalAttrs: { patchShebangs mlir patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py + # Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read' + substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \ + --replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };" + # remove when no longer required substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \ --replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin" @@ -150,10 +151,9 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - page = "tags?per_page=2"; - filter = ".[1].name | split(\"-\") | .[1]"; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + page = "tags?per_page=4"; }; meta = with lib; { @@ -162,8 +162,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ asl20 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch b/pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch new file mode 100644 index 000000000000..37f521f6e02d --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocmlir/initparamdata-sort-const.patch @@ -0,0 +1,13 @@ +diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h +index 3f5ee596819a..590d53788822 100644 +--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h ++++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h +@@ -209,7 +209,7 @@ private: + size_t original_pos; + int64_t padding_amount; + +- bool operator<(const InitParamData &rhs) { ++ bool operator<(const InitParamData &rhs) const { + if (this->padding_amount < rhs.padding_amount) { + return true; + } else if (this->padding_amount == rhs.padding_amount) { diff --git a/pkgs/development/rocm-modules/6/rocprim/default.nix b/pkgs/development/rocm-modules/6/rocprim/default.nix index 3e8525655141..5b13e5b5e3cc 100644 --- a/pkgs/development/rocm-modules/6/rocprim/default.nix +++ b/pkgs/development/rocm-modules/6/rocprim/default.nix @@ -1,87 +1,91 @@ -{ lib -, fetchpatch -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, clr -, gtest -, gbenchmark -, buildTests ? false -, buildBenchmarks ? false -, gpuTargets ? [ ] +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? [ ], }: stdenv.mkDerivation (finalAttrs: { pname = "rocprim"; - version = "6.0.2"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocPRIM"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-nWvq26qRPZ6Au1rc5cR74TKArcdUFg7O9djFi8SvMeM="; + hash = "sha256-0aHxpBuYIYhI2UER45YhHHL5YcxA+XeXoihcUs2AmCo="; }; - patches = [ - (fetchpatch { - name = "arch-conversion-marco.patch"; - url = "https://salsa.debian.org/rocm-team/rocprim/-/raw/70c8aaee3cf545d92685f4ed9bf8f41e3d4d570c/debian/patches/arch-conversion-macro.patch"; - hash = "sha256-oXdmbCArOB5bKE8ozDFrSh4opbO+c4VI6PNhljeUSms="; - }) - ]; - nativeBuildInputs = [ cmake rocm-cmake clr ]; - buildInputs = lib.optionals buildTests [ - gtest - ] ++ lib.optionals buildBenchmarks [ - gbenchmark - ]; + buildInputs = + lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; - cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] ++ lib.optionals buildTests [ - "-DBUILD_TEST=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_BENCHMARK=ON" - ]; + cmakeFlags = + [ + "-DCMAKE_BUILD_TYPE=Release" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/test_* $test/bin - mv $out/bin/rocprim $test/bin - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - mv $out/bin/benchmark_* $benchmark/bin - '' + lib.optionalString (buildTests || buildBenchmarks) '' - rmdir $out/bin - ''; + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + mv $out/bin/rocprim $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -90,6 +94,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocprofiler-register/default.nix b/pkgs/development/rocm-modules/6/rocprofiler-register/default.nix new file mode 100644 index 000000000000..68e0955d4302 --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocprofiler-register/default.nix @@ -0,0 +1,79 @@ +{ + lib, + stdenv, + rocm-runtime, + rocprofiler, + numactl, + libpciaccess, + libxml2, + elfutils, + fetchFromGitHub, + rocmUpdateScript, + cmake, + clang, + clr, + python3Packages, + gpuTargets ? clr.gpuTargets, +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "rocprofiler-register"; + version = "6.3.1"; + + src = fetchFromGitHub { + owner = "ROCm"; + repo = "rocprofiler-register"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-UZsCiGnudsbL1v5lKBx7Vz3/HRnGn4f86Pd+qu3ryh0="; + fetchSubmodules = true; + }; + + nativeBuildInputs = [ + cmake + clang + clr + ]; + + buildInputs = [ + numactl + libpciaccess + libxml2 + elfutils + rocm-runtime + + rocprofiler.rocmtoolkit-merged + + python3Packages.lxml + python3Packages.cppheaderparser + python3Packages.pyyaml + python3Packages.barectf + python3Packages.pandas + ]; + cmakeFlags = [ + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DBUILD_TEST=OFF" + "-DROCPROFILER_BUILD_TESTS=0" + "-DROCPROFILER_BUILD_SAMPLES=0" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + + passthru.updateScript = rocmUpdateScript { + name = "rocprofiler-register"; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; + }; + + meta = with lib; { + description = "Profiling with perf-counters and derived metrics"; + homepage = "https://github.com/ROCm/rocprofiler"; + license = with licenses; [ mit ]; # mitx11 + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/development/rocm-modules/6/rocprofiler/default.nix b/pkgs/development/rocm-modules/6/rocprofiler/default.nix index a80c6e90f132..ed613b726a55 100644 --- a/pkgs/development/rocm-modules/6/rocprofiler/default.nix +++ b/pkgs/development/rocm-modules/6/rocprofiler/default.nix @@ -4,17 +4,14 @@ fetchFromGitHub, rocmUpdateScript, symlinkJoin, - replaceVars, cmake, clang, clr, rocm-core, - rocm-thunk, + rocm-runtime, rocm-device-libs, roctracer, rocdbgapi, - rocm-smi, - hsa-amd-aqlprofile-bin, numactl, libpciaccess, libxml2, @@ -22,6 +19,7 @@ mpi, systemd, gtest, + git, python3Packages, gpuTargets ? clr.gpuTargets, }: @@ -32,12 +30,10 @@ let paths = [ rocm-core - rocm-thunk + rocm-runtime rocm-device-libs roctracer rocdbgapi - rocm-smi - hsa-amd-aqlprofile-bin clr ]; @@ -48,32 +44,27 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocprofiler"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocprofiler"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-yzgw9g5cHAZpdbU44+1ScZyUcZ2I4GGfjbm9GSqCClk="; + hash = "sha256-kLiqKwxpeAkziBq4FRWhJ3IAvxVRcxi3AEEWgcVOfw4="; + fetchSubmodules = true; }; patches = [ # These just simply won't build ./0000-dont-install-tests-hsaco.patch - - # Fix bad paths - (replaceVars ./0001-fix-shell-scripts.patch { - rocmtoolkit_merged = rocmtoolkit-merged; - }) - - # Fix for missing uint32_t not defined - ./0002-include-stdint-in-version.patch + ./optional-aql-in-cmake.patch ]; nativeBuildInputs = [ cmake clang clr + git python3Packages.lxml python3Packages.cppheaderparser python3Packages.pyyaml @@ -93,12 +84,20 @@ stdenv.mkDerivation (finalAttrs: { propagatedBuildInputs = [ rocmtoolkit-merged ]; + # HACK: allow building without aqlprofile, probably explodes at runtime if use profiling + env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all"; + #HACK: rocprofiler's cmake doesn't add these deps properly + env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w"; + cmakeFlags = [ "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" "-DHIP_ROOT_DIR=${clr}" "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DBUILD_TEST=OFF" + "-DROCPROFILER_BUILD_TESTS=0" + "-DROCPROFILER_BUILD_SAMPLES=0" "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" @@ -107,6 +106,13 @@ stdenv.mkDerivation (finalAttrs: { postPatch = '' patchShebangs . + substituteInPlace cmake_modules/rocprofiler_utils.cmake \ + --replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE) + return()' + + substituteInPlace CMakeLists.txt \ + --replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' "" + substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \ --replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" @@ -115,20 +121,16 @@ stdenv.mkDerivation (finalAttrs: { ''; postInstall = '' - # Why do these not already have the executable bit set? - chmod +x $out/lib/rocprofiler/librocprof-tool.so - chmod +x $out/share/rocprofiler/tests-v1/test/ocl/SimpleConvolution - # Why do these have the executable bit set? - chmod -x $out/libexec/rocprofiler/counters/basic_counters.xml - chmod -x $out/libexec/rocprofiler/counters/derived_counters.xml + chmod -x $out/libexec/rocprofiler/counters/*.xml ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; + passthru.rocmtoolkit-merged = rocmtoolkit-merged; meta = with lib; { description = "Profiling with perf-counters and derived metrics"; @@ -136,8 +138,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor clr.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch b/pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch new file mode 100644 index 000000000000..83bf88fb1e7e --- /dev/null +++ b/pkgs/development/rocm-modules/6/rocprofiler/optional-aql-in-cmake.patch @@ -0,0 +1,147 @@ +From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch +diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake +index 7b7c472..0aba3ed 100644 +--- a/cmake_modules/rocprofiler_env.cmake ++++ b/cmake_modules/rocprofiler_env.cmake +@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE) + target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1) + endif() + ++set(ROCPROFILER_LD_AQLPROFILE false) + # Enable direct loading of AQL-profile HSA extension + if(ROCPROFILER_LD_AQLPROFILE) + target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1) +@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "") + message(FATAL_ERROR "ROCM_ROOT_DIR is not found.") + endif() + +-find_library( +- HSA_AMD_AQLPROFILE_LIBRARY +- NAMES hsa-amd-aqlprofile64 +- HINTS ${CMAKE_PREFIX_PATH} +- PATHS ${ROCM_ROOT_DIR} +- PATH_SUFFIXES lib REQUIRED) +diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt +index 61782f0..16c83bf 100644 +--- a/src/api/CMakeLists.txt ++++ b/src/api/CMakeLists.txt +@@ -51,15 +51,6 @@ find_file( + NO_DEFAULT_PATH REQUIRED) + get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY) + +-find_library( +- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so" +- HINTS ${CMAKE_PREFIX_PATH} +- PATHS ${ROCM_PATH} +- PATH_SUFFIXES lib) +- +-if(NOT AQLPROFILE_LIB) +- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!") +-endif() + + # ######################################################################################## + # Adding Old Library Files +@@ -247,7 +238,7 @@ target_include_directories( + PUBLIC $ + PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler) + target_link_libraries( +- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++ ++ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++ + dl rocprofiler::build-flags rocprofiler::memcheck) + + get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME) +@@ -325,8 +316,7 @@ target_link_options( + -Wl,--no-undefined) + target_link_libraries( + rocprofiler-v2 +- PRIVATE ${AQLPROFILE_LIB} +- hsa-runtime64::hsa-runtime64 ++ PRIVATE hsa-runtime64::hsa-runtime64 + Threads::Threads + atomic + numa +diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp +index 2c47186..6b39634 100644 +--- a/src/util/hsa_rsrc_factory.cpp ++++ b/src/util/hsa_rsrc_factory.cpp +@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize + if (kern_arg_pool_ == nullptr) + CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); + +- // Get AqlProfile API table +- aqlprofile_api_ = {}; +-#ifdef ROCP_LD_AQLPROFILE +- status = LoadAqlProfileLib(&aqlprofile_api_); +-#else +- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, +- hsa_ven_amd_aqlprofile_VERSION_MAJOR, +- sizeof(aqlprofile_api_), &aqlprofile_api_); +-#endif +- CHECK_STATUS("aqlprofile API table load failed", status); +- + // Get Loader API table + loader_api_ = {}; + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, +diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp +index 0a44d18..fab5b75 100644 +--- a/test/util/hsa_rsrc_factory.cpp ++++ b/test/util/hsa_rsrc_factory.cpp +@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize + if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); + if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); + +- // Get AqlProfile API table +- aqlprofile_api_ = {0}; +-#ifdef ROCP_LD_AQLPROFILE +- status = LoadAqlProfileLib(&aqlprofile_api_); +-#else +- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, +- hsa_ven_amd_aqlprofile_VERSION_MAJOR, +- sizeof(aqlprofile_api_), &aqlprofile_api_); +-#endif +- CHECK_STATUS("aqlprofile API table load failed", status); +- + // Get Loader API table + loader_api_ = {0}; + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, +diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt +index 107cb51..0f6d4bf 100644 +--- a/tests-v2/unittests/core/CMakeLists.txt ++++ b/tests-v2/unittests/core/CMakeLists.txt +@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES + INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests") + target_link_libraries( + runCoreUnitTests +- PRIVATE ${AQLPROFILE_LIB} +- test_hsatool_library ++ PRIVATE test_hsatool_library + hsa-runtime64::hsa-runtime64 + Threads::Threads + GTest::gtest GTest::gtest_main +@@ -285,4 +284,4 @@ endif() + # for the *_FilePlugin tests + if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output") + file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output") +-endif() +\ No newline at end of file ++endif() +diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt +index 53180d5..0c4d4a7 100644 +--- a/tests-v2/unittests/profiler/CMakeLists.txt ++++ b/tests-v2/unittests/profiler/CMakeLists.txt +@@ -122,7 +122,7 @@ target_compile_definitions( + PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) + + target_link_libraries( +- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 ++ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64 + GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl) + + add_dependencies(tests runUnitTests) +@@ -158,4 +158,4 @@ endif() + # for the *_FilePlugin tests + if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output") + file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output") +-endif() +\ No newline at end of file ++endif() diff --git a/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix b/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix index 6d9a91be00ef..cc1a5f973d9f 100644 --- a/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix +++ b/pkgs/development/rocm-modules/6/rocr-debug-agent/default.nix @@ -12,13 +12,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocr-debug-agent"; - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocr_debug_agent"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-8Q800T7mwBy8/rujVNyCQ0ZpZ9uPKKk+Sv9ibpWou/8="; + hash = "sha256-HYag5/E72hopDhS9EVcdyGgSvzbCMzKqLC+SIS28Y9M="; }; nativeBuildInputs = [ @@ -45,8 +45,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -55,8 +55,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ ncsa ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocrand/default.nix b/pkgs/development/rocm-modules/6/rocrand/default.nix index 7d0e80b5af4e..02c96b5a8c18 100644 --- a/pkgs/development/rocm-modules/6/rocrand/default.nix +++ b/pkgs/development/rocm-modules/6/rocrand/default.nix @@ -1,34 +1,38 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, cmake -, rocm-cmake -, clr -, gtest -, gbenchmark -, buildTests ? false -, buildBenchmarks ? false -, gpuTargets ? [ ] +{ + lib, + stdenv, + fetchFromGitHub, + rocmUpdateScript, + cmake, + rocm-cmake, + clr, + gtest, + gbenchmark, + buildTests ? false, + buildBenchmarks ? false, + gpuTargets ? clr.localGpuTargets or [ ], }: stdenv.mkDerivation (finalAttrs: { - pname = "rocrand"; - version = "6.0.2"; + pname = "rocrand${clr.gpuArchSuffix}"; + version = "6.3.1"; - outputs = [ - "out" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = + [ + "out" + ] + ++ lib.optionals buildTests [ + "test" + ] + ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocRAND"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-BBkcYOP+zh3OQTxuSkeiJizwnE9Gr5Jbhx0e8SU/mmU="; + hash = "sha256-rrRLPqEw39M+6dtPW8DcnQiSZNwxWNINJ1wjU098Vkk="; }; nativeBuildInputs = [ @@ -37,45 +41,52 @@ stdenv.mkDerivation (finalAttrs: { clr ]; - buildInputs = lib.optionals buildTests [ - gtest - ] ++ lib.optionals buildBenchmarks [ - gbenchmark - ]; + buildInputs = + lib.optionals buildTests [ + gtest + ] + ++ lib.optionals buildBenchmarks [ + gbenchmark + ]; - cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - "-DHIP_ROOT_DIR=${clr}" - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals (gpuTargets != [ ]) [ - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - ] ++ lib.optionals buildTests [ - "-DBUILD_TEST=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_BENCHMARK=ON" - ]; + cmakeFlags = + [ + "-DHIP_ROOT_DIR=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ] + ++ lib.optionals buildTests [ + "-DBUILD_TEST=ON" + ] + ++ lib.optionals buildBenchmarks [ + "-DBUILD_BENCHMARK=ON" + ]; - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - mv $out/bin/test_* $test/bin - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - mv $out/bin/benchmark_* $benchmark/bin - '' + lib.optionalString (buildTests || buildBenchmarks) '' - rm -r $out/bin/rocRAND - # Fail if bin/ isn't actually empty - rmdir $out/bin - ''; + postInstall = + lib.optionalString buildTests '' + mkdir -p $test/bin + mv $out/bin/test_* $test/bin + '' + + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + mv $out/bin/benchmark_* $benchmark/bin + '' + + lib.optionalString (buildTests || buildBenchmarks) '' + rm -r $out/bin/rocRAND + # Fail if bin/ isn't actually empty + rmdir $out/bin + ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -84,6 +95,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocsolver/default.nix b/pkgs/development/rocm-modules/6/rocsolver/default.nix index 577ef73d2534..c7ddc190d3cc 100644 --- a/pkgs/development/rocm-modules/6/rocsolver/default.nix +++ b/pkgs/development/rocm-modules/6/rocsolver/default.nix @@ -6,6 +6,7 @@ cmake, rocm-cmake, rocblas, + rocprim, rocsparse, clr, fmt, @@ -14,12 +15,25 @@ lapack-reference, buildTests ? false, buildBenchmarks ? false, - gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx906:xnack-" ] + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1010" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ] + ), }: stdenv.mkDerivation (finalAttrs: { - pname = "rocsolver"; - version = "6.0.2"; + pname = "rocsolver${clr.gpuArchSuffix}"; + version = "6.3.1"; outputs = [ @@ -36,12 +50,13 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocSOLVER"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-tglQpwCSFABRuEDiJrzQVFIdx9p85E2MiUYN0aoTAXo="; + hash = "sha256-+sGU+0CB48iolJSyYo+xH36q5LCUp+nKtOYbguzMuhg="; }; nativeBuildInputs = [ cmake + # no ninja, it buffers console output and nix times out long periods of no output rocm-cmake clr ] @@ -51,7 +66,11 @@ stdenv.mkDerivation (finalAttrs: { buildInputs = [ + # FIXME: rocblas and rocsolver can't build in parallel + # but rocsolver doesn't need rocblas' offload builds at build time + # could we build against a rocblas-minimal? rocblas + rocprim rocsparse fmt ] @@ -64,8 +83,9 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" - "-DCMAKE_CXX_FLAGS=-Wno-switch" # Way too many warnings + "-DHIP_CLANG_NUM_PARALLEL_JOBS=4" + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_VERBOSE_MAKEFILE=ON" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -96,11 +116,11 @@ stdenv.mkDerivation (finalAttrs: { ''; passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + name = "rocsolver"; + inherit (finalAttrs.src) owner repo; }; + enableParallelBuilding = true; requiredSystemFeatures = [ "big-parallel" ]; meta = with lib; { @@ -111,8 +131,5 @@ stdenv.mkDerivation (finalAttrs: { platforms = platforms.linux; timeout = 14400; # 4 hours maxSilent = 14400; # 4 hours - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocsparse/default.nix b/pkgs/development/rocm-modules/6/rocsparse/default.nix index a4c29bc980c9..46498cf24688 100644 --- a/pkgs/development/rocm-modules/6/rocsparse/default.nix +++ b/pkgs/development/rocm-modules/6/rocsparse/default.nix @@ -15,12 +15,12 @@ python3Packages, buildTests ? false, buildBenchmarks ? false, # Seems to depend on tests - gpuTargets ? [ ], + gpuTargets ? clr.localGpuTargets or clr.gpuTargets, }: stdenv.mkDerivation (finalAttrs: { pname = "rocsparse"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -37,11 +37,12 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocSPARSE"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-nTYnEHkTtq0jBeMj4HXpqkJu8LQc+Z6mpjhMP7tJAHQ="; + hash = "sha256-vyLfXbnxPZlR6mfbLh1E7S7HdOSHjuhGQcfihAlvvwY="; }; nativeBuildInputs = [ cmake + # no ninja, it buffers console output and nix times out long periods of no output rocm-cmake clr gfortran @@ -61,7 +62,7 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_BUILD_TYPE=Release" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -145,8 +146,8 @@ stdenv.mkDerivation (finalAttrs: { updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; }; @@ -156,8 +157,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocthrust/default.nix b/pkgs/development/rocm-modules/6/rocthrust/default.nix index 2044968441ff..05c12afcd437 100644 --- a/pkgs/development/rocm-modules/6/rocthrust/default.nix +++ b/pkgs/development/rocm-modules/6/rocthrust/default.nix @@ -15,7 +15,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocthrust"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -32,7 +32,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocThrust"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-Zk7FxcedaDUbx9RCX8aWN0xZO/B5cOs/l5MDqZKQpJo="; + hash = "sha256-c1+hqP/LipaQ2/lPJo79YBd9H0n0Y7yHkxe0/INE14s="; }; nativeBuildInputs = [ @@ -48,7 +48,6 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" "-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 @@ -64,9 +63,6 @@ stdenv.mkDerivation (finalAttrs: { ] ++ lib.optionals buildBenchmarks [ "-DBUILD_BENCHMARKS=ON" - ] - ++ lib.optionals (buildTests || buildBenchmarks) [ - "-DCMAKE_CXX_FLAGS=-Wno-deprecated-builtins" # Too much spam ]; postInstall = @@ -84,8 +80,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -94,8 +90,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ asl20 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/roctracer/default.nix b/pkgs/development/rocm-modules/6/roctracer/default.nix index b5e411dd059d..5ddd3ced37fc 100644 --- a/pkgs/development/rocm-modules/6/roctracer/default.nix +++ b/pkgs/development/rocm-modules/6/roctracer/default.nix @@ -19,7 +19,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "roctracer"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -36,7 +36,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "roctracer"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-a6/N6W3JXVI0VZRGxlS3cVENC3VTP1w9UFnd0+EWAuo="; + hash = "sha256-GhnF7rqNLQLLB7nzIp0xNqyqBOwj9ZJ+hzzj1EAaXWU="; }; nativeBuildInputs = @@ -106,8 +106,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -116,8 +116,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor clr.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rocwmma/default.nix b/pkgs/development/rocm-modules/6/rocwmma/default.nix index 7f6669312fd1..d529bb8d241d 100644 --- a/pkgs/development/rocm-modules/6/rocwmma/default.nix +++ b/pkgs/development/rocm-modules/6/rocwmma/default.nix @@ -14,12 +14,12 @@ buildExtendedTests ? false, buildBenchmarks ? false, buildSamples ? false, - gpuTargets ? [ ], # gpuTargets = [ "gfx908:xnack-" "gfx90a:xnack-" "gfx90a:xnack+" ... ] + gpuTargets ? [ ], }: stdenv.mkDerivation (finalAttrs: { pname = "rocwmma"; - version = "6.0.2"; + version = "6.3.1"; outputs = [ @@ -39,7 +39,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm"; repo = "rocWMMA"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-vbC4OuCmEpD38lVq0uXNw86iS4KkL6isOVq6vmlu1oM="; + hash = "sha256-kih3hn6QhcMmyj9n8f8eO+RIgKQgWKIuzg8fb0eoRPE="; }; patches = lib.optionals (buildTests || buildBenchmarks) [ @@ -64,7 +64,9 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ - "-DCMAKE_CXX_COMPILER=hipcc" + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" "-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}" "-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}" # Manually define CMAKE_INSTALL_ @@ -105,8 +107,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -115,8 +117,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/rpp/default.nix b/pkgs/development/rocm-modules/6/rpp/default.nix index 31b67cc86057..cfde91f7d523 100644 --- a/pkgs/development/rocm-modules/6/rpp/default.nix +++ b/pkgs/development/rocm-modules/6/rpp/default.nix @@ -29,13 +29,13 @@ stdenv.mkDerivation (finalAttrs: { "cpu" ); - version = "6.0.2"; + version = "6.3.1"; src = fetchFromGitHub { owner = "ROCm"; repo = "rpp"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-AquAVoEqlsBVxd41hG2sVo9UoSS+255eCQzIfGkC/Tk="; + hash = "sha256-METwagek17/DdZGaOTQqvyU6xGt7OBMLHk4YM4KmgtA="; }; nativeBuildInputs = @@ -55,16 +55,19 @@ stdenv.mkDerivation (finalAttrs: { boost ]; + CFLAGS = "-I${openmp.dev}/include"; + CXXFLAGS = "-I${openmp.dev}/include"; cmakeFlags = [ + "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include" + "-DOpenMP_omp_LIBRARY=${openmp}/lib" "-DROCM_PATH=${clr}" ] ++ lib.optionals (gpuTargets != [ ]) [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals (!useOpenCL && !useCPU) [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" "-DBACKEND=HIP" ] ++ lib.optionals (useOpenCL && !useCPU) [ @@ -86,8 +89,8 @@ stdenv.mkDerivation (finalAttrs: { passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; meta = with lib; { @@ -96,8 +99,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) diff --git a/pkgs/development/rocm-modules/6/tensile/default.nix b/pkgs/development/rocm-modules/6/tensile/default.nix index e2a892b34308..7e77523d7d08 100644 --- a/pkgs/development/rocm-modules/6/tensile/default.nix +++ b/pkgs/development/rocm-modules/6/tensile/default.nix @@ -1,6 +1,5 @@ { lib, - stdenv, fetchFromGitHub, fetchpatch, rocmUpdateScript, @@ -9,71 +8,99 @@ setuptools, pyyaml, msgpack, + simplejson, + ujson, + orjson, pandas, joblib, filelock, - rocminfo, - writeText, + clr, + rich, + isTensileLite ? false, }: buildPythonPackage rec { - pname = "tensile"; - version = "6.0.2"; + pname = if isTensileLite then "tensilelite" else "tensile"; + # Using a specific commit which has code object compression support from after the 6.3 release + # Without compression packages are too large for hydra + version = "6.3-unstable-2024-12-10"; format = "pyproject"; src = fetchFromGitHub { owner = "ROCm"; repo = "Tensile"; - rev = "rocm-${version}"; - hash = "sha256-B9/2Iw1chwDL6it1CKC8W8v4Qac/J2z9nwlpwjnllDc="; + rev = "1752af518190500891a865379a4569b8abf6ba01"; + hash = "sha256-Wvz4PVs//3Ox7ykZHpjPzOVwlyATyc+MmVVenfTzWK4="; }; + # TODO: It should be possible to run asm caps test ONCE for all supported arches + # We currently disable the test because it's slow and runs each time tensile launches + + postPatch = + lib.optionalString (!isTensileLite) '' + if grep -F .SafeLoader Tensile/LibraryIO.py; then + substituteInPlace Tensile/LibraryIO.py \ + --replace-fail "yaml.SafeLoader" "yaml.CSafeLoader" + fi + # See TODO above about asm caps test + substituteInPlace Tensile/Common.py \ + --replace-fail 'if globalParameters["AssemblerPath"] is not None:' "if False:" + '' + + '' + # Add an assert that the fallback 9,0,0 is supported before setting the kernel to it + # If it's not detected as supported we have an issue with compiler paths or the compiler is broken + # and it's better to stop immediately + substituteInPlace Tensile/KernelWriter.py \ + --replace-fail '= (9,0,0)' '= (9,0,0);assert(globalParameters["AsmCaps"][(9,0,0)]["SupportedISA"])' + find . -type f -iname "*.sh" -exec chmod +x {} \; + patchShebangs Tensile + ''; + buildInputs = [ setuptools ]; - propagatedBuildInputs = [ - pyyaml - msgpack - pandas - joblib - ]; + propagatedBuildInputs = + [ + pyyaml + msgpack + pandas + joblib + ] + ++ lib.optionals (!isTensileLite) [ + rich + ] + ++ lib.optionals isTensileLite [ + simplejson + ujson + orjson + ]; - patches = [ - (fetchpatch { - name = "Extend-Tensile-HIP-ISA-compatibility.patch"; + patches = + lib.optional (!isTensileLite) ./tensile-solutionstructs-perf-fix.diff + ++ lib.optional (!isTensileLite) ./tensile-create-library-dont-copy-twice.diff + ++ lib.optional (!isTensileLite) (fetchpatch { + # [PATCH] Extend Tensile HIP ISA compatibility + sha256 = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8="; url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch"; - hash = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8="; }) - (fetchpatch { - name = "Don-t-copy-file-twice-in-copyStaticFiles.patch"; - url = "https://github.com/GZGavinZhao/Tensile/commit/9e14d5a00a096bddac605910a0e4dfb4c35bb0d5.patch"; - hash = "sha256-gOzjJyD1K056OFQ+hK5nbUeBhxLTIgQLoT+0K12SypI="; - }) - ]; + ++ lib.optional isTensileLite ./tensilelite-create-library-dont-copy-twice.diff + ++ lib.optional isTensileLite ./tensilelite-gen_assembly-venv-err-handling.diff + ++ lib.optional isTensileLite ./tensilelite-compression.diff; doCheck = false; # Too many errors, not sure how to set this up properly nativeCheckInputs = [ pytestCheckHook filelock - rocminfo + clr ]; - env = { - ROCM_PATH = rocminfo; - }; - - # TODO: remove this workaround once https://github.com/NixOS/nixpkgs/pull/323869 - # does not cause issues anymore, or at least replace it with a better workaround - setupHook = writeText "setup-hook" '' - export TENSILE_ROCM_ASSEMBLER_PATH="${stdenv.cc.cc}/bin/clang++"; - ''; + env.ROCM_PATH = rocminfo; pythonImportsCheck = [ "Tensile" ]; passthru.updateScript = rocmUpdateScript { name = pname; - owner = src.owner; - repo = src.repo; + inherit (src) owner repo; }; meta = with lib; { @@ -82,7 +109,5 @@ buildPythonPackage rec { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor version != versions.minor stdenv.cc.version || versionAtLeast version "7.0.0"; }; } diff --git a/pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff b/pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff new file mode 100644 index 000000000000..2b680241d9e4 --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/tensile-create-library-dont-copy-twice.diff @@ -0,0 +1,20 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index a1644606..c6ca2882 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -852,9 +852,14 @@ def copyStaticFiles(outputPath=None): + "KernelHeader.h", + ] + ++ import filecmp + for fileName in libraryStaticFiles: + # copy file +- shutil.copy(os.path.join(globalParameters["SourcePath"], fileName), outputPath) ++ # no need to copy twice if it has already been copied ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + return libraryStaticFiles + diff --git a/pkgs/development/rocm-modules/6/tensile/tensile-solutionstructs-perf-fix.diff b/pkgs/development/rocm-modules/6/tensile/tensile-solutionstructs-perf-fix.diff new file mode 100644 index 000000000000..7157238042ec --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/tensile-solutionstructs-perf-fix.diff @@ -0,0 +1,48 @@ +diff --git a/Tensile/SolutionStructs.py b/Tensile/SolutionStructs.py +index f663c6f1..17bcf897 100644 +--- a/Tensile/SolutionStructs.py ++++ b/Tensile/SolutionStructs.py +@@ -4828,24 +4828,26 @@ class Solution(collections.abc.Mapping): + # create a dictionary of lists of parameter values + @staticmethod + def getSerialNaming(objs): ++ valid_params = sorted(validParameters.keys()) + data = {} +- for objIdx in range(0, len(objs)): +- obj = objs[objIdx] +- for paramName in sorted(obj.keys()): +- if paramName in list(validParameters.keys()): +- paramValue = obj[paramName] +- if paramName in data: +- if paramValue not in data[paramName]: +- data[paramName].append(paramValue) +- else: +- data[paramName] = [ paramValue ] +- maxObjs = 1 +- for paramName in data: +- if not isinstance(data[paramName][0],dict): +- data[paramName] = sorted(data[paramName]) +- maxObjs *= len(data[paramName]) +- numDigits = len(str(maxObjs)) +- return [ data, numDigits ] ++ ++ objs = [getattr(obj, "_state", obj) for obj in objs] ++ ++ for param in valid_params: ++ d = [] ++ for obj in objs: ++ if param in obj: ++ v = obj[param] ++ if v not in d: ++ d.append(v) ++ if len(d): ++ if not isinstance(d[0], dict): d.sort() ++ data[param] = d ++ ++ # Calculate max objects using prod() from math module ++ max_objs = math.prod(len(values) for values in data.values()) ++ num_digits = len(str(max_objs)) ++ return data, num_digits + + ######################################## + # Get Name Serial \ No newline at end of file diff --git a/pkgs/development/rocm-modules/6/tensile/tensilelite-compression.diff b/pkgs/development/rocm-modules/6/tensile/tensilelite-compression.diff new file mode 100644 index 000000000000..bfc0146efe3d --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/tensilelite-compression.diff @@ -0,0 +1,345 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index b8cea84558..1bc24bd1dd 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -41,6 +41,7 @@ + from .SolutionLibrary import MasterSolutionLibrary + from .SolutionStructs import Solution + from .CustomYamlLoader import load_logic_gfx_arch ++from .Utilities.Profile import profile + + import argparse + import collections +@@ -1233,7 +1234,7 @@ def validateLibrary(masterLibraries: MasterSolutionLibrary, + ################################################################################ + # Tensile Create Library + ################################################################################ +-@timing ++@profile + def TensileCreateLibrary(): + print1("") + print1(HR) +@@ -1558,7 +1559,6 @@ def param(key, value): + + print1("# Check if generated files exists.") + +- @timing + def checkFileExistence(files): + for filePath in files: + if not os.path.exists(filePath): +diff --git a/Tensile/Utilities/Profile.py b/Tensile/Utilities/Profile.py +new file mode 100644 +index 0000000000..cc3c7eb44c +--- /dev/null ++++ b/Tensile/Utilities/Profile.py +@@ -0,0 +1,77 @@ ++################################################################################ ++# ++# Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. ++# ++# Permission is hereby granted, free of charge, to any person obtaining a copy ++# of this software and associated documentation files (the "Software"), to deal ++# in the Software without restriction, including without limitation the rights ++# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++# copies of the Software, and to permit persons to whom the Software is ++# furnished to do so, subject to the following conditions: ++# ++# The above copyright notice and this permission notice shall be included in ++# all copies or substantial portions of the Software. ++# ++# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++# SOFTWARE. ++# ++################################################################################ ++ ++import cProfile ++import pstats ++import os ++ ++from pathlib import Path ++from datetime import datetime, timezone ++from typing import Callable, Tuple ++ ++PROFILE_ENV_VAR = "TENSILE_PROFILE" ++ ++def profile(func: Callable) -> Callable: ++ """Profiling decorator. ++ ++ Add ``@profile`` to mark a function for profiling; set the environment variable ++ TENSILE_PROFILE=ON to enable profiling decorated functions. ++ """ ++ if not envVariableIsSet(PROFILE_ENV_VAR): ++ return func ++ def wrapper(*args, **kwargs): ++ path, filename = initProfileArtifacts(func.__name__) ++ ++ prof = cProfile.Profile() ++ output = prof.runcall(func, *args, **kwargs) ++ result = pstats.Stats(prof) ++ result.sort_stats(pstats.SortKey.TIME) ++ result.dump_stats(path/filename) ++ ++ return output ++ return wrapper ++ ++def envVariableIsSet(varName: str) -> bool: ++ """Checks if the provided environment variable is set to "ON", "TRUE", or "1" ++ Args: ++ varName: Environment variable name. ++ Returns: ++ True if the environment variable is set, otherwise False. ++ """ ++ value = os.environ.get(varName, "").upper() ++ return True if value in ["ON", "TRUE", "1"] else False ++ ++def initProfileArtifacts(funcName: str) -> Tuple[Path, str]: ++ """Initializes filenames and paths for profiling artifacts based on the current datetime ++ Args: ++ funcName: The name of the function being profiled, nominally passed via func.__name__ ++ Returns: ++ A tuple (path, filename) where the path is the artifact directory and filename is ++ a .prof file with the profiling results. ++ """ ++ dt = datetime.now(timezone.utc) ++ filename = f"{funcName}-{dt.strftime('%Y-%m-%dT%H-%M-%SZ')}.prof" ++ path = Path().cwd()/f"profiling-results-{dt.strftime('%Y-%m-%d')}" ++ path.mkdir(exist_ok=True) ++ return path, filename + +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index e62b0072df..2c843ba936 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -56,7 +56,7 @@ + import sys + from timeit import default_timer as timer + from pathlib import Path +-from typing import Sequence, List ++from typing import Sequence, List, Union + + def timing(func): + def wrapper(*args, **kwargs): +@@ -90,87 +90,142 @@ def processKernelSource(kernel, kernelWriterAssembly, ti): + + return (err, src, header, kernelName, filename) + +-def getAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath): +- destDir = ensurePath(os.path.join(outputPath, 'library')) +- asmDir = kernelWriterAssembly.getAssemblyDirectory() +- archs = collections.defaultdict(list) ++def linkIntoCodeObject( ++ objFiles: List[str], coPathDest: Union[Path, str], kernelWriterAssembly: KernelWriterAssembly ++): ++ """Links object files into a code object file. + +- for k in filter(lambda k: k['KernelLanguage'] == 'Assembly', kernels): +- archs[tuple(k['ISA'])].append(k) ++ Args: ++ objectFiles: A list of object files to be linked. ++ coPathDest: The destination path for the code object file. ++ kernelWriterAssembly: An instance of KernelWriterAssembly to get link arguments. + +- coFiles = [] ++ Raises: ++ RuntimeError: If linker invocation fails. ++ """ ++ if os.name == "nt": ++ # On Windows, the objectFiles list command line (including spaces) ++ # exceeds the limit of 8191 characters, so using response file ++ ++ responseFile = os.path.join('/tmp', 'clangArgs.txt') ++ with open(responseFile, 'wt') as file: ++ file.write(" ".join(objFiles)) ++ file.flush() ++ ++ args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFileRaw, '@clangArgs.txt'] ++ subprocess.check_call(args, cwd=asmDir) ++ else: ++ numObjFiles = len(objFiles) ++ maxObjFiles = 10000 ++ ++ if numObjFiles > maxObjFiles: ++ batchedObjFiles = [objFiles[i:i+maxObjFiles] for i in range(0, numObjFiles, maxObjFiles)] ++ batchSize = int(math.ceil(numObjFiles / maxObjFiles)) ++ ++ newObjFiles = [str(coPathDest) + "." + str(i) for i in range(0, batchSize)] ++ newObjFilesOutput = [] ++ ++ for batch, filename in zip(batchedObjFiles, newObjFiles): ++ if len(batch) > 1: ++ args = [globalParameters["ROCmLdPath"], "-r"] + batch + [ "-o", filename] ++ print2(f"Linking object files into fewer object files: {' '.join(args)}") ++ subprocess.check_call(args) ++ newObjFilesOutput.append(filename) ++ else: ++ newObjFilesOutput.append(batchedObjFiles[0]) ++ ++ args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjFilesOutput, str(coPathDest)) ++ print2(f"Linking object files into code object: {' '.join(args)}") ++ subprocess.check_call(args) ++ else: ++ args = kernelWriterAssembly.getLinkCodeObjectArgs(objFiles, str(coPathDest)) ++ print2(f"Linking object files into code object: {' '.join(args)}") ++ subprocess.check_call(args) ++ ++ ++def compressCodeObject( ++ coPathSrc: Union[Path, str], coPathDest: Union[Path, str], gfx: str, bundler: str ++): ++ """Compresses a code object file using the provided bundler. ++ ++ Args: ++ coPathSrc: The source path of the code object file to be compressed. ++ coPathDest: The destination path for the compressed code object file. ++ gfx: The target GPU architecture. ++ bundler: The path to the Clang Offload Bundler executable. ++ ++ Raises: ++ RuntimeError: If compressing the code object file fails. ++ """ ++ args = [ ++ bundler, ++ "--compress", ++ "--type=o", ++ "--bundle-align=4096", ++ f"--targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--{gfx}", ++ "--input=/dev/null", ++ f"--input={str(coPathSrc)}", ++ f"--output={str(coPathDest)}", ++ ] ++ ++ print2(f"Bundling/compressing code objects: {' '.join(args)}") ++ try: ++ out = subprocess.check_output(args, stderr=subprocess.STDOUT) ++ print2(f"Output: {out}") ++ except subprocess.CalledProcessError as err: ++ raise RuntimeError( ++ f"Error compressing code object via bundling: {err.output}\nFailed command: {' '.join(args)}" ++ ) ++ ++def buildAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath): ++ ++ isAsm = lambda k: k["KernelLanguage"] == "Assembly" ++ ++ extObj = ".o" ++ extCo = ".co" ++ extCoRaw = ".co.raw" + +- for arch, archKernels in archs.items(): ++ destDir = Path(ensurePath(os.path.join(outputPath, 'library'))) ++ asmDir = Path(kernelWriterAssembly.getAssemblyDirectory()) ++ ++ archKernelMap = collections.defaultdict(list) ++ for k in filter(isAsm, kernels): ++ archKernelMap[tuple(k['ISA'])].append(k) ++ ++ coFiles = [] ++ for arch, archKernels in archKernelMap.items(): + if len(archKernels) == 0: + continue + +- archName = getGfxName(arch) ++ gfx = getGfxName(arch) + + if globalParameters["MergeFiles"] or globalParameters["NumMergedFiles"] > 1 or globalParameters["LazyLibraryLoading"]: +- objectFiles = [kernelWriterAssembly.getKernelFileBase(k) + '.o' for k in archKernels if 'codeObjectFile' not in k] ++ objectFiles = [str(asmDir / (kernelWriterAssembly.getKernelFileBase(k) + extObj)) for k in archKernels if 'codeObjectFile' not in k] + +- #Group kernels from placeholder libraries + coFileMap = collections.defaultdict(list) ++ + if len(objectFiles): +- coFileMap[os.path.join(destDir, "TensileLibrary_"+archName+".co")] = objectFiles ++ coFileMap[asmDir / ("TensileLibrary_"+ gfx + extCoRaw)] = objectFiles + + for kernel in archKernels: + coName = kernel.get("codeObjectFile", None) + if coName: +- coFileMap[os.path.join(destDir, coName+".co")] += [kernelWriterAssembly.getKernelFileBase(kernel) + '.o'] ++ coFileMap[asmDir / (coName + extCoRaw)].append(str(asmDir / (kernelWriterAssembly.getKernelFileBase(kernel) + extObj))) + +- for coFile, objectFiles in coFileMap.items(): +- if os.name == "nt": +- # On Windows, the objectFiles list command line (including spaces) +- # exceeds the limit of 8191 characters, so using response file ++ for coFileRaw, objFiles in coFileMap.items(): + +- responseArgs = objectFiles +- responseFile = os.path.join(asmDir, 'clangArgs.txt') +- with open(responseFile, 'wt') as file: +- file.write( " ".join(responseArgs) ) +- file.flush() +- +- args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFile, '@clangArgs.txt'] +- subprocess.check_call(args, cwd=asmDir) +- else: +- numOfObjectFiles = len(objectFiles) +- splitFiles = 10000 +- if numOfObjectFiles > splitFiles: +- slicedObjectFilesList = [objectFiles[x:x+splitFiles] for x in range(0, numOfObjectFiles, splitFiles)] +- objectFileBasename = os.path.split(coFile)[-1].split('.')[0] +- numOfOneSliceOfObjectFiles = int(math.ceil(numOfObjectFiles / splitFiles)) +- newObjectFiles = [ objectFileBasename + "_" + str(i) + ".o" for i in range(0, numOfOneSliceOfObjectFiles)] +- newObjectFilesOutput = [] +- for slicedObjectFiles, objectFile in zip(slicedObjectFilesList, newObjectFiles): +- if len(slicedObjectFiles) > 1: +- args = [globalParameters["ROCmLdPath"], "-r"] + slicedObjectFiles + [ "-o", objectFile ] +- if globalParameters["PrintCodeCommands"]: +- print(asmDir) +- print(' '.join(args)) +- subprocess.check_call(args, cwd=asmDir) +- newObjectFilesOutput.append(objectFile) +- else: +- newObjectFilesOutput.append(slicedObjectFiles[0]) +- args = kernelWriterAssembly.getLinkCodeObjectArgs(newObjectFilesOutput, coFile) +- if globalParameters["PrintCodeCommands"]: +- print(asmDir) +- print(' '.join(args)) +- subprocess.check_call(args, cwd=asmDir) +- else: +- args = kernelWriterAssembly.getLinkCodeObjectArgs(objectFiles, coFile) +- if globalParameters["PrintCodeCommands"]: +- print(asmDir) +- print(' '.join(args)) +- subprocess.check_call(args, cwd=asmDir) ++ linkIntoCodeObject(objFiles, coFileRaw, kernelWriterAssembly) ++ coFile = destDir / coFileRaw.name.replace(extCoRaw, extCo) ++ compressCodeObject(coFileRaw, coFile, gfx, globalParameters["ClangOffloadBundlerPath"]) + + coFiles.append(coFile) + else: + # no mergefiles + def newCoFileName(kName): + if globalParameters["PackageLibrary"]: +- return os.path.join(destDir, archName, kName + '.co') ++ return os.path.join(destDir, gfx, kName + '.co') + else: +- return os.path.join(destDir, kName + '_' + archName + '.co') ++ return os.path.join(destDir, kName + '_' + gfx + '.co') + + def orgCoFileName(kName): + return os.path.join(asmDir, kName + '.co') +@@ -179,6 +234,8 @@ def orgCoFileName(kName): + map(lambda k: kernelWriterAssembly.getKernelFileBase(k), archKernels)), "Copying code objects"): + shutil.copyfile(src, dst) + coFiles.append(dst) ++ printWarning("Code object files are not compressed in `--no-merge-files` build mode.") ++ + return coFiles + + def which(p): +@@ -645,7 +702,7 @@ def success(kernel): + + if not globalParameters["GenerateSourcesAndExit"]: + codeObjectFiles += buildSourceCodeObjectFiles(CxxCompiler, kernelFiles, outputPath) +- codeObjectFiles += getAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath) ++ codeObjectFiles += buildAssemblyCodeObjectFiles(kernelsToBuild, kernelWriterAssembly, outputPath) + + Common.popWorkingPath() # build_tmp + Common.popWorkingPath() # workingDir + diff --git a/pkgs/development/rocm-modules/6/tensile/tensilelite-create-library-dont-copy-twice.diff b/pkgs/development/rocm-modules/6/tensile/tensilelite-create-library-dont-copy-twice.diff new file mode 100644 index 000000000000..c630803c191f --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/tensilelite-create-library-dont-copy-twice.diff @@ -0,0 +1,37 @@ +diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py +index 2b9da394..b001fa7c 100644 +--- a/Tensile/TensileCreateLibrary.py ++++ b/Tensile/TensileCreateLibrary.py +@@ -808,10 +808,13 @@ def copyStaticFiles(outputPath=None): + "ReductionTemplate.h", + "memory_gfx.h" ] + ++ import filecmp + for fileName in libraryStaticFiles: +- # copy file +- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \ +- outputPath ) ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ # no need to copy twice if it has already been copied ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + return libraryStaticFiles + +@@ -1417,9 +1420,13 @@ def TensileCreateLibrary(): + writeCMake(outputPath, solutionFiles, sourceKernelFiles, staticFiles, masterLibraries) + + # Make sure to copy the library static files. ++ import filecmp + for fileName in staticFiles: +- shutil.copy( os.path.join(globalParameters["SourcePath"], fileName), \ +- outputPath ) ++ src = os.path.join(globalParameters["SourcePath"], fileName) ++ dst = os.path.join(outputPath, os.path.basename(src)) ++ # no need to copy twice if it has already been copied ++ if not os.path.isfile(dst) or not filecmp.cmp(src, dst): ++ shutil.copyfile(src, dst) + + # write solutions and kernels + codeObjectFiles = writeSolutionsAndKernels(outputPath, CxxCompiler, None, solutions, diff --git a/pkgs/development/rocm-modules/6/tensile/tensilelite-gen_assembly-venv-err-handling.diff b/pkgs/development/rocm-modules/6/tensile/tensilelite-gen_assembly-venv-err-handling.diff new file mode 100644 index 000000000000..0667599a5839 --- /dev/null +++ b/pkgs/development/rocm-modules/6/tensile/tensilelite-gen_assembly-venv-err-handling.diff @@ -0,0 +1,36 @@ +diff --git a/Tensile/Ops/gen_assembly.sh b/Tensile/Ops/gen_assembly.sh +index 0b21b6c6..609f1dd1 100755 +--- a/Tensile/Ops/gen_assembly.sh ++++ b/Tensile/Ops/gen_assembly.sh +@@ -23,6 +23,8 @@ + # + ################################################################################ + ++set -x ++ + archStr=$1 + dst=$2 + venv=$3 +@@ -35,7 +37,13 @@ fi + + toolchain=${rocm_path}/llvm/bin/clang++ + +-. ${venv}/bin/activate ++if ! [ -z ${TENSILE_GEN_ASSEMBLY_TOOLCHAIN+x} ]; then ++ toolchain="${TENSILE_GEN_ASSEMBLY_TOOLCHAIN}" ++fi ++ ++if [ -f ${venv}/bin/activate ]; then ++ . ${venv}/bin/activate ++fi + + IFS=';' read -r -a archs <<< "$archStr" + +@@ -77,4 +85,6 @@ for arch in "${archs[@]}"; do + python3 ./ExtOpCreateLibrary.py --src=$dst --co=$dst/extop_$arch.co --output=$dst --arch=$arch + done + +-deactivate ++if [ -f ${venv}/bin/activate ]; then ++ deactivate ++fi diff --git a/pkgs/development/rocm-modules/6/triton/default.nix b/pkgs/development/rocm-modules/6/triton/default.nix new file mode 100644 index 000000000000..8dc20629a219 --- /dev/null +++ b/pkgs/development/rocm-modules/6/triton/default.nix @@ -0,0 +1,56 @@ +{ + triton-no-cuda, + rocmPackages, + fetchFromGitHub, +}: +(triton-no-cuda.override (_old: { + inherit rocmPackages; + rocmSupport = true; + stdenv = rocmPackages.llvm.rocmClangStdenv; + llvm = rocmPackages.triton-llvm; +})).overridePythonAttrs + (old: { + doCheck = false; + stdenv = rocmPackages.llvm.rocmClangStdenv; + version = "3.2.0"; + src = fetchFromGitHub { + owner = "triton-lang"; + repo = "triton"; + rev = "9641643da6c52000c807b5eeed05edaec4402a67"; # "release/3.2.x"; + hash = "sha256-V1lpARwOLn28ZHfjiWR/JJWGw3MB34c+gz6Tq1GOVfo="; + }; + buildInputs = old.buildInputs ++ [ + rocmPackages.clr + ]; + dontStrip = true; + env = old.env // { + CXXFLAGS = "-O3 -I${rocmPackages.clr}/include -I/build/source/third_party/triton/third_party/nvidia/backend/include"; + TRITON_OFFLINE_BUILD = 1; + }; + patches = [ ]; + postPatch = '' + # Remove nvidia backend so we don't depend on unfree nvidia headers + # when we only want to target ROCm + rm -rf third_party/nvidia + substituteInPlace CMakeLists.txt \ + --replace-fail "add_subdirectory(test)" "" + sed -i '/nvidia\|NVGPU\|registerConvertTritonGPUToLLVMPass\|mlir::test::/Id' bin/RegisterTritonDialects.h + sed -i '/TritonTestAnalysis/Id' bin/CMakeLists.txt + substituteInPlace python/setup.py \ + --replace-fail 'backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]' \ + 'backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]' + find . -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} + + find . -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} + + # remove any downloads + substituteInPlace python/setup.py \ + --replace-fail "[get_json_package_info()]" "[]"\ + --replace-fail "[get_llvm_package_info()]" "[]"\ + --replace-fail "curr_version != version" "False" + # Don't fetch googletest + substituteInPlace cmake/AddTritonUnitTest.cmake \ + --replace-fail 'include(''${PROJECT_SOURCE_DIR}/unittest/googletest.cmake)' "" \ + --replace-fail "include(GoogleTest)" "find_package(GTest REQUIRED)" + substituteInPlace third_party/amd/backend/compiler.py \ + --replace-fail '"/opt/rocm/llvm/bin/ld.lld"' "os.environ['ROCM_PATH']"' + "/llvm/bin/ld.lld"' + ''; + }) diff --git a/pkgs/development/rocm-modules/6/update.nix b/pkgs/development/rocm-modules/6/update.nix index ee638dc58c2f..69a45f9d4cd4 100644 --- a/pkgs/development/rocm-modules/6/update.nix +++ b/pkgs/development/rocm-modules/6/update.nix @@ -7,8 +7,10 @@ name ? "", owner ? "", repo ? "", - page ? "releases/latest", - filter ? ".tag_name | split(\"-\") | .[1]", + page ? "releases", + # input: array of [ { tag_name: "rocm-6.x.x", }, ... ]. some entries may have bad names like rocm-test-date we want to skip + # output: first tag_name/name that's a proper version if any + filter ? "map(.tag_name // .name) | map(select(test(\"^rocm-[0-9]+\\\\.[0-9]+(\\\\.[0-9]+)?$\"))) | first | ltrimstr(\"rocm-\")", }: let @@ -18,13 +20,39 @@ let updateScript = writeScript "update.sh" '' #!/usr/bin/env nix-shell #!nix-shell -i bash -p curl jq common-updater-scripts - version="$(curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} \ - -sL "https://api.github.com/repos/${owner}/${repo}/${page}" | jq '${filter}' --raw-output)" + set -euo pipefail - IFS='.' read -a version_arr <<< "$version" + fetch_releases() { + local api_url="https://api.github.com/repos/${owner}/${repo}/${page}" + if [ "${page}" = "releases" ]; then + api_url="$api_url?per_page=4" + fi + >&2 echo $api_url + curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} -sL "$api_url" + } + + find_valid_version() { + local releases="$1" + >&2 echo "$releases" + # Wrap in array if not already an array to make handline specific release or tags page the same + >&2 echo jq -r 'if type == "array" then . else [.] end | ${filter}' + echo "$releases" | jq -r 'if type == "array" then . else [.] end | ${filter}' + } + + releases="$(fetch_releases)" + version="$(find_valid_version "$releases")" + + if [ -z "$version" ]; then + echo "No valid version found in the fetched release(s)." >&2 + exit 1 + fi + + IFS='.' read -ra version_arr <<< "$version" + + >&2 echo parsed version "$version_arr" from "$version" if (( ''${version_arr[0]} > 6 )); then - echo "'rocmPackages_6.${pname}' is already at it's maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." 1>&2 + echo "'rocmPackages_6.${pname}' is already at its maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." >&2 exit 1 fi