rocmPackages: 6.0.2 -> 6.3.1

Includes patch suggested by @shuni64 which fixes half precision ABI
issues
Includes hipblaslt compression patch
Includes configurable hipblaslt support in rocblas
rocmPackages_6.hipblaslt: respect NIX_BUILD_CORES in tensilelite
rocmPackages_6.hipblas: propagate hipblas-common
rocmPackages_6.clr: avoid confusion with hipClangPath

Co-authored-by: Gavin Zhao <git@gzgz.dev>
This commit is contained in:
Luna Nova
2025-01-04 10:24:14 -08:00
parent 72a714071e
commit 4d2c7ad003
109 changed files with 4858 additions and 2912 deletions

View File

@@ -380,6 +380,10 @@
- `python3Packages.jaeger-client` was removed because it was deprecated upstream. [OpenTelemetry](https://opentelemetry.io) is the recommended replacement. - `python3Packages.jaeger-client` was removed because it was deprecated upstream. [OpenTelemetry](https://opentelemetry.io) is the recommended replacement.
- `rocmPackages.rocm-thunk` has been removed and its functionality has been integrated with the ROCm CLR. Use `rocmPackages.clr` instead.
- `rocmPackages.clang-ocl` has been removed. [It was deprecated by AMD in 2023.](https://github.com/ROCm/clang-ocl)
- `nodePackages.meshcommander` has been removed, as the package was deprecated by Intel. - `nodePackages.meshcommander` has been removed, as the package was deprecated by Intel.
- The default version of `z3` has been updated from 4.8 to 4.13. There are still a few packages that need specific older versions; those will continue to be maintained as long as other packages depend on them but may be removed in the future. - The default version of `z3` has been updated from 4.8 to 4.13. There are still a few packages that need specific older versions; those will continue to be maintained as long as other packages depend on them but may be removed in the future.

View File

@@ -0,0 +1,417 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_ASM_X86_AMD_HSMP_H_
#define _UAPI_ASM_X86_AMD_HSMP_H_
#include <linux/types.h>
#pragma pack(4)
#define HSMP_MAX_MSG_LEN 8
/*
* HSMP Messages supported
*/
enum hsmp_message_ids {
HSMP_TEST = 1, /* 01h Increments input value by 1 */
HSMP_GET_SMU_VER, /* 02h SMU FW version */
HSMP_GET_PROTO_VER, /* 03h HSMP interface version */
HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */
HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */
HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */
HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */
HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */
HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */
HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */
HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */
HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */
HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */
HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */
HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */
HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */
HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */
HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */
HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */
HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */
HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */
HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */
HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */
HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */
HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */
HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */
HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */
HSMP_GET_SOCKET_FMAX_FMIN,/* 1Ch Get Fmax and Fmin per socket */
HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */
HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */
HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */
HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */
HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */
HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */
HSMP_GET_METRIC_TABLE_VER,/* 23h Get metrics table version */
HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */
HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
HSMP_MSG_ID_MAX,
};
struct hsmp_message {
__u32 msg_id; /* Message ID */
__u16 num_args; /* Number of input argument words in message */
__u16 response_sz; /* Number of expected output/response words */
__u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */
__u16 sock_ind; /* socket number */
};
enum hsmp_msg_type {
HSMP_RSVD = -1,
HSMP_SET = 0,
HSMP_GET = 1,
};
enum hsmp_proto_versions {
HSMP_PROTO_VER2 = 2,
HSMP_PROTO_VER3,
HSMP_PROTO_VER4,
HSMP_PROTO_VER5,
HSMP_PROTO_VER6
};
struct hsmp_msg_desc {
int num_args;
int response_sz;
enum hsmp_msg_type type;
};
/*
* User may use these comments as reference, please find the
* supported list of messages and message definition in the
* HSMP chapter of respective family/model PPR.
*
* Not supported messages would return -ENOMSG.
*/
static const struct hsmp_msg_desc hsmp_msg_desc_table[]
__attribute__((unused)) = {
/* RESERVED */
{0, 0, HSMP_RSVD},
/*
* HSMP_TEST, num_args = 1, response_sz = 1
* input: args[0] = xx
* output: args[0] = xx + 1
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_SMU_VER, num_args = 0, response_sz = 1
* output: args[0] = smu fw ver
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1
* output: args[0] = proto version
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1
* output: args[0] = socket power in mWatts
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0
* input: args[0] = power limit value in mWatts
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1
* output: args[0] = socket power limit value in mWatts
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1
* output: args[0] = maximuam socket power limit in mWatts
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0
* input: args[0] = apic id[31:16] + boost limit value in MHz[15:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0
* input: args[0] = boost limit value in MHz
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1
* input: args[0] = apic id
* output: args[0] = boost limit value in MHz
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1
* output: args[0] = proc hot status
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0
* input: args[0] = min link width[15:8] + max link width[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0
* input: args[0] = df pstate[7:0]
*/
{1, 0, HSMP_SET},
/* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */
{0, 0, HSMP_SET},
/*
* HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2
* output: args[0] = fclk in MHz, args[1] = mclk in MHz
*/
{0, 2, HSMP_GET},
/*
* HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1
* output: args[0] = core clock in MHz
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1
* output: args[0] = average c0 residency
*/
{0, 1, HSMP_GET},
/*
* HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0
* input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
* input: args[0] = nbioid[23:16]
* output: args[0] = max dpm level[15:8] + min dpm level[7:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
* output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] +
* bw in percentage[7:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1
* output: args[0] = temperature in degree celsius. [15:8] integer part +
* [7:5] fractional part
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
* input: args[0] = DIMM address[7:0]
* output: args[0] = refresh rate[3] + temperature range[2:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
* input: args[0] = DIMM address[7:0]
* output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
* DIMM address[7:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
* input: args[0] = DIMM address[7:0]
* output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] +
* DIMM address[7:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
* output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
* input: args[0] = apic id [31:0]
* output: args[0] = frequency in MHz[31:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
* output: args[0] = power in mW[31:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
* output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
* input: args[0] = link id[15:8] + bw type[2:0]
* output: args[0] = io bandwidth in Mbps[31:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
* input: args[0] = link id[15:8] + bw type[2:0]
* output: args[0] = xgmi bandwidth in Mbps[31:0]
*/
{1, 1, HSMP_GET},
/*
* HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
* input: args[0] = min link width[15:8] + max link width[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
* input: args[0] = link rate control value
* output: args[0] = previous link rate control value
*/
{1, 1, HSMP_SET},
/*
* HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
* input: args[0] = power efficiency mode[2:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
* input: args[0] = min df pstate[15:8] + max df pstate[7:0]
*/
{1, 0, HSMP_SET},
/*
* HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1
* output: args[0] = metrics table version
*/
{0, 1, HSMP_GET},
/*
* HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0
*/
{0, 0, HSMP_GET},
/*
* HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2
* output: args[0] = lower 32 bits of the address
* output: args[1] = upper 32 bits of the address
*/
{0, 2, HSMP_GET},
};
/* Metrics table (supported only with proto version 6) */
struct hsmp_metric_table {
__u32 accumulation_counter;
/* TEMPERATURE */
__u32 max_socket_temperature;
__u32 max_vr_temperature;
__u32 max_hbm_temperature;
__u64 max_socket_temperature_acc;
__u64 max_vr_temperature_acc;
__u64 max_hbm_temperature_acc;
/* POWER */
__u32 socket_power_limit;
__u32 max_socket_power_limit;
__u32 socket_power;
/* ENERGY */
__u64 timestamp;
__u64 socket_energy_acc;
__u64 ccd_energy_acc;
__u64 xcd_energy_acc;
__u64 aid_energy_acc;
__u64 hbm_energy_acc;
/* FREQUENCY */
__u32 cclk_frequency_limit;
__u32 gfxclk_frequency_limit;
__u32 fclk_frequency;
__u32 uclk_frequency;
__u32 socclk_frequency[4];
__u32 vclk_frequency[4];
__u32 dclk_frequency[4];
__u32 lclk_frequency[4];
__u64 gfxclk_frequency_acc[8];
__u64 cclk_frequency_acc[96];
/* FREQUENCY RANGE */
__u32 max_cclk_frequency;
__u32 min_cclk_frequency;
__u32 max_gfxclk_frequency;
__u32 min_gfxclk_frequency;
__u32 fclk_frequency_table[4];
__u32 uclk_frequency_table[4];
__u32 socclk_frequency_table[4];
__u32 vclk_frequency_table[4];
__u32 dclk_frequency_table[4];
__u32 lclk_frequency_table[4];
__u32 max_lclk_dpm_range;
__u32 min_lclk_dpm_range;
/* XGMI */
__u32 xgmi_width;
__u32 xgmi_bitrate;
__u64 xgmi_read_bandwidth_acc[8];
__u64 xgmi_write_bandwidth_acc[8];
/* ACTIVITY */
__u32 socket_c0_residency;
__u32 socket_gfx_busy;
__u32 dram_bandwidth_utilization;
__u64 socket_c0_residency_acc;
__u64 socket_gfx_busy_acc;
__u64 dram_bandwidth_acc;
__u32 max_dram_bandwidth;
__u64 dram_bandwidth_utilization_acc;
__u64 pcie_bandwidth_acc[4];
/* THROTTLERS */
__u32 prochot_residency_acc;
__u32 ppt_residency_acc;
__u32 socket_thm_residency_acc;
__u32 vr_thm_residency_acc;
__u32 hbm_thm_residency_acc;
__u32 spare;
/* New items at the end to maintain driver compatibility */
__u32 gfxclk_frequency[8];
};
/* Reset to default packing */
#pragma pack()
/* Define unique ioctl command for hsmp msgs using generic _IOWR */
#define HSMP_BASE_IOCTL_NR 0xF8
#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message)
#endif /*_ASM_X86_AMD_HSMP_H_*/

View File

@@ -0,0 +1,79 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
pkg-config,
libdrm,
wrapPython,
autoPatchelfHook,
}:
let
esmi_ib_src = fetchFromGitHub {
owner = "amd";
repo = "esmi_ib_library";
rev = "esmi_pkg_ver-3.0.3";
hash = "sha256-q0w5c5c+CpXkklmSyfzc+sbkt4cHNxscGJA3AXwvHxQ=";
};
in
stdenv.mkDerivation (finalAttrs: {
pname = "amdsmi";
version = "6.3.1";
src = fetchFromGitHub {
owner = "rocm";
repo = "amdsmi";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-ZHr7G2/A4t3yH4S5urt1u8DZqGRcXpZUC/eavhkgPMY=";
};
postPatch = ''
substituteInPlace goamdsmi_shim/CMakeLists.txt \
--replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \
--replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#'
cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library
mkdir -p ./esmi_ib_library/include/asm
cp ${./amd_hsmp.h} ./esmi_ib_library/include/asm/amd_hsmp.h
'';
patches = [ ];
nativeBuildInputs = [
cmake
pkg-config
wrapPython
autoPatchelfHook
];
buildInputs = [
libdrm
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postInstall = ''
wrapPythonProgramsIn $out
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "System management interface for AMD GPUs supported by ROCm";
homepage = "https://github.com/ROCm/rocm_smi_lib";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = [ "x86_64-linux" ];
};
})

View File

@@ -0,0 +1,226 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
rocm-cmake,
clr,
rocblas,
rocsolver,
gtest,
msgpack,
libxml2,
python3,
python3Packages,
openmp,
hipblas-common,
hipblas,
nlohmann_json,
triton-llvm,
rocmlir,
lapack-reference,
ninja,
ncurses,
libffi,
zlib,
zstd,
xz,
pkg-config,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [
# aotriton GPU support list:
# https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py
"gfx90a"
"gfx942"
"gfx1100"
"gfx1101"
],
}:
stdenv.mkDerivation (
finalAttrs:
let
py = python3.withPackages (ps: [
ps.pyyaml
ps.distutils
ps.setuptools
ps.packaging
ps.numpy
ps.wheel
ps.filelock
ps.iniconfig
ps.pluggy
ps.pybind11
]);
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
compiler = "amdclang++";
cFlags = "-O3 -DNDEBUG";
triton-llvm' = triton-llvm;
in
{
pname = "aotriton";
version = "0.8.2b";
src = fetchFromGitHub {
owner = "ROCm";
repo = "aotriton";
rev = "${finalAttrs.version}";
hash = "sha256-15Qr0EgV+pU6Hnmxqy76Nmryqr7qNkoE6iDcg9z35Hk=";
fetchSubmodules = true;
};
env.CXX = compiler;
env.ROCM_PATH = "${clr}";
requiredSystemFeatures = [ "big-parallel" ];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
# Need an empty cuda.h for this to compile
# Better than pulling in unfree cuda headers
postPatch = ''
touch third_party/triton/third_party/nvidia/include/cuda.h
'';
doCheck = false;
doInstallCheck = false;
nativeBuildInputs = [
cmake
rocm-cmake
pkg-config
py
clr
ninja
];
buildInputs =
[
rocblas
rocsolver
hipblas-common
hipblas
openmp
libffi
ncurses
xz
nlohmann_json
rocmlir
msgpack
libxml2
python3Packages.msgpack
zlib
zstd
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
env.TRITON_OFFLINE_BUILD = 1;
env.LLVM_SYSPATH = "${triton-llvm'}";
env.JSON_SYSPATH = nlohmann_json;
env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir";
env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include";
# Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files
preConfigure = ''
mkdir third_party/triton/third_party/nvidia/backend/include/
touch third_party/triton/third_party/nvidia/backend/include/cuda.h
find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
substituteInPlace third_party/triton/python/setup.py \
--replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \
--replace-fail 'system == "Linux"' 'False'
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${cFlags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${cFlags}'
)
prependToVar cmakeFlags "-GNinja"
mkdir -p /build/tmp-home
export HOME=/build/tmp-home
'';
# Excerpt from README:
# Note: do not run ninja separately, due to the limit of the current build system,
# ninja install will run the whole build process unconditionally.
dontBuild = true;
installPhase = ''
runHook preInstall
ninja -v install
runHook postInstall
'';
cmakeFlags =
[
"-Wno-dev"
"-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
"-DCMAKE_CXX_COMPILER=${compiler}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DAMDGPU_TARGETS=${gpuTargets'}"
"-DGPU_TARGETS=${gpuTargets'}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
meta = with lib; {
description = "ROCm Ahead of Time (AOT) Triton Math Library ";
homepage = "https://github.com/ROCm/aotriton";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
}
)

View File

@@ -0,0 +1,68 @@
From https://github.com/triton-lang/triton/pull/1400/files
diff --git a/python/setup.py b/python/setup.py
index 1d5eb89c591d..9dfd5a62ad63 100644
--- a/third_party/triton/python/setup.py
+++ b/third_party/triton/python/setup.py
@@ -1,14 +1,13 @@
-import distutils
import os
import platform
import re
import shutil
import subprocess
import sys
+import sysconfig
import tarfile
import tempfile
import urllib.request
-from distutils.version import LooseVersion
from pathlib import Path
from typing import NamedTuple
@@ -154,10 +153,10 @@ def run(self):
"CMake must be installed to build the following extensions: " + ", ".join(e.name for e in self.extensions)
)
- if platform.system() == "Windows":
- cmake_version = LooseVersion(re.search(r"version\s*([\d.]+)", out.decode()).group(1))
- if cmake_version < "3.1.0":
- raise RuntimeError("CMake >= 3.1.0 is required on Windows")
+ match = re.search(r"version\s*(?P<major>\d+)\.(?P<minor>\d+)([\d.]+)?", out.decode())
+ cmake_major, cmake_minor = int(match.group("major")), int(match.group("minor"))
+ if (cmake_major, cmake_minor) < (3, 20):
+ raise RuntimeError("CMake >= 3.20.0 is required")
for ext in self.extensions:
self.build_extension(ext)
@@ -176,7 +175,7 @@ def build_extension(self, ext):
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
# python directories
- python_include_dir = distutils.sysconfig.get_python_inc()
+ python_include_dir = sysconfig.get_path("platinclude")
cmake_args = [
"-DLLVM_ENABLE_WERROR=ON",
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
@@ -200,9 +199,8 @@ def build_extension(self, ext):
cmake_args += ["-A", "x64"]
build_args += ["--", "/m"]
else:
- import multiprocessing
cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg]
- build_args += ['-j' + str(2 * multiprocessing.cpu_count())]
+ build_args += ['-j' + str(2 * os.cpu_count())]
env = os.environ.copy()
subprocess.check_call(["cmake", self.base_dir] + cmake_args, cwd=self.build_temp, env=env)
@@ -245,6 +243,11 @@ def build_extension(self, ext):
"Topic :: Software Development :: Build Tools",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
],
test_suite="tests",
extras_require={

View File

@@ -1,45 +0,0 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-device-libs,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "clang-ocl";
version = "6.0.2";
src = fetchFromGitHub {
owner = "ROCm";
repo = "clang-ocl";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-uMSvcVJj+me2E+7FsXZ4l4hTcK6uKEegXpkHGcuist0=";
};
nativeBuildInputs = [
cmake
rocm-cmake
];
buildInputs = [ rocm-device-libs ];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
meta = with lib; {
description = "OpenCL compilation with clang compiler";
homepage = "https://github.com/ROCm/clang-ocl";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View File

@@ -0,0 +1,40 @@
diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake
index 3f233b72f..67bdc62ee 100644
--- a/rocclr/cmake/ROCclr.cmake
+++ b/rocclr/cmake/ROCclr.cmake
@@ -44,6 +44,19 @@ find_package(Threads REQUIRED)
find_package(AMD_OPENCL)
+# Find X11 package
+find_package(X11 REQUIRED)
+if(NOT X11_FOUND)
+ message(FATAL_ERROR "X11 libraries not found")
+endif()
+
+# Find OpenGL package
+find_package(OpenGL REQUIRED)
+if(NOT OpenGL_FOUND)
+ message(FATAL_ERROR "OpenGL not found")
+endif()
+
+
add_library(rocclr STATIC)
include(ROCclrCompilerOptions)
@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC
${ROCCLR_SRC_DIR}/device
${ROCCLR_SRC_DIR}/elf
${ROCCLR_SRC_DIR}/include
+ ${X11_INCLUDE_DIR}
+ ${OPENGL_INCLUDE_DIR}
${AMD_OPENCL_INCLUDE_DIRS})
-target_link_libraries(rocclr PUBLIC Threads::Threads)
+target_link_libraries(rocclr PUBLIC
+ Threads::Threads
+ ${X11_LIBRARIES}
+ ${OPENGL_LIBRARIES})
# IPC on Windows is not supported
if(UNIX)
target_link_libraries(rocclr PUBLIC rt)

View File

@@ -4,52 +4,58 @@
callPackage, callPackage,
fetchFromGitHub, fetchFromGitHub,
fetchpatch, fetchpatch,
fetchurl,
rocmUpdateScript, rocmUpdateScript,
makeWrapper, makeWrapper,
cmake, cmake,
perl, perl,
clang,
hip-common, hip-common,
hipcc, hipcc,
rocm-device-libs, rocm-device-libs,
rocm-comgr, rocm-comgr,
rocm-runtime, rocm-runtime,
rocm-core,
roctracer, roctracer,
rocminfo, rocminfo,
rocm-smi, rocm-smi,
numactl, numactl,
libffi,
zstd,
zlib,
libGL, libGL,
libxml2, libxml2,
libX11, libX11,
python3Packages, python3Packages,
rocm-merged-llvm,
khronos-ocl-icd-loader,
gcc-unwrapped,
writeShellScriptBin,
localGpuTargets ? null,
}: }:
let let
inherit (rocm-core) ROCM_LIBPATCH_VERSION;
hipClang = rocm-merged-llvm;
hipClangPath = "${hipClang}/bin";
wrapperArgs = [ wrapperArgs = [
"--prefix PATH : $out/bin" "--prefix PATH : $out/bin"
"--prefix LD_LIBRARY_PATH : ${rocm-runtime}" "--prefix LD_LIBRARY_PATH : ${rocm-runtime}"
"--set HIP_PLATFORM amd" "--set HIP_PLATFORM amd"
"--set HIP_PATH $out" "--set HIP_PATH $out"
"--set HIP_CLANG_PATH ${clang}/bin" "--set HIP_CLANG_PATH ${hipClangPath}"
"--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode" "--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode"
"--set HSA_PATH ${rocm-runtime}" "--set HSA_PATH ${rocm-runtime}"
"--set ROCM_PATH $out" "--set ROCM_PATH $out"
]; ];
amdclang = writeShellScriptBin "amdclang" ''
# https://github.com/NixOS/nixpkgs/issues/305641 exec clang "$@"
# Not needed when 3.29.2 is in unstable '';
cmake' = cmake.overrideAttrs (old: rec { amdclangxx = writeShellScriptBin "amdclang++" ''
version = "3.29.2"; exec clang++ "$@"
src = fetchurl { '';
url = "https://cmake.org/files/v${lib.versions.majorMinor version}/cmake-${version}.tar.gz";
hash = "sha256-NttLaSaqt0G6bksuotmckZMiITIwi03IJNQSPLcwNS4=";
};
});
in in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "clr"; pname = "clr";
version = "6.0.2"; version = "6.3.1";
outputs = [ outputs = [
"out" "out"
@@ -60,15 +66,17 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "clr"; repo = "clr";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-ZMpA7vCW2CcpGdBLZfPimMHcgjhN1PHuewJiYwZMgGY="; hash = "sha256-wo3kwk6HQJsP+ycaVh2mmMjEgGlj/Z6KXNXOXbJ1KLs=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
makeWrapper makeWrapper
cmake' cmake
perl perl
python3Packages.python python3Packages.python
python3Packages.cppheaderparser python3Packages.cppheaderparser
amdclang
amdclangxx
]; ];
buildInputs = [ buildInputs = [
@@ -76,9 +84,15 @@ stdenv.mkDerivation (finalAttrs: {
libGL libGL
libxml2 libxml2
libX11 libX11
khronos-ocl-icd-loader
hipClang
libffi
zstd
zlib
]; ];
propagatedBuildInputs = [ propagatedBuildInputs = [
rocm-core
rocm-device-libs rocm-device-libs
rocm-comgr rocm-comgr
rocm-runtime rocm-runtime
@@ -86,6 +100,7 @@ stdenv.mkDerivation (finalAttrs: {
]; ];
cmakeFlags = [ cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries "-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries
"-DCLR_BUILD_HIP=ON" "-DCLR_BUILD_HIP=ON"
"-DCLR_BUILD_OCL=ON" "-DCLR_BUILD_OCL=ON"
@@ -94,6 +109,9 @@ stdenv.mkDerivation (finalAttrs: {
"-DHIP_PLATFORM=amd" "-DHIP_PLATFORM=amd"
"-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext" "-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext"
"-DROCM_PATH=${rocminfo}" "-DROCM_PATH=${rocminfo}"
"-DBUILD_ICD=ON"
"-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds?
"-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}"
# Temporarily set variables to work around upstream CMakeLists issue # Temporarily set variables to work around upstream CMakeLists issue
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
@@ -102,26 +120,36 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
]; ];
env.LLVM_DIR = "";
patches = [ patches = [
./cmake-find-x11-libgl.patch
(fetchpatch { (fetchpatch {
name = "add-missing-operators.patch"; # Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99
url = "https://github.com/ROCm/clr/commit/86bd518981b364c138f9901b28a529899d8654f3.patch"; sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA=";
hash = "sha256-lbswri+zKLxif0hPp4aeJDeVfadhWZz4z+m+G2XcCPI="; url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch";
}) })
(fetchpatch { (fetchpatch {
name = "static-functions.patch"; # improve rocclr isa compatibility check
url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch"; sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs=";
hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs="; url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch";
}) })
(fetchpatch { (fetchpatch {
name = "extend-hip-isa-compatibility-check.patch"; # [PATCH] Improve hipamd compat check
url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch"; sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8=";
hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI="; url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch";
}) })
(fetchpatch { (fetchpatch {
name = "improve-rocclr-isa-compatibility-check.patch"; # [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition
url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch"; # Fixes crash due to UB in KernelBlitManager::setArgument
hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y="; sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I=";
url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch";
})
(fetchpatch {
# [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues
sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4=";
url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch";
}) })
]; ];
@@ -135,79 +163,117 @@ stdenv.mkDerivation (finalAttrs: {
--replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" "" --replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" ""
substituteInPlace hipamd/src/hip_embed_pch.sh \ substituteInPlace hipamd/src/hip_embed_pch.sh \
--replace "\''$LLVM_DIR/bin/clang" "${clang}/bin/clang" --replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang"
# https://lists.debian.org/debian-ai/2024/02/msg00178.html
substituteInPlace rocclr/utils/flags.hpp \
--replace-fail "HIP_USE_RUNTIME_UNBUNDLER, false" "HIP_USE_RUNTIME_UNBUNDLER, true"
substituteInPlace opencl/khronos/icd/loader/icd_platform.h \ substituteInPlace opencl/khronos/icd/loader/icd_platform.h \
--replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \ --replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \
'#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";' '#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";'
# new unbundler has better error messages, defaulting it on
substituteInPlace rocclr/utils/flags.hpp \
--replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true"
''; '';
postInstall = '' postInstall = ''
chmod +x $out/bin/*
patchShebangs $out/bin patchShebangs $out/bin
# hipcc.bin and hipconfig.bin is mysteriously never installed cp ${amdclang}/bin/* $out/bin/
cp -a ${hipcc}/bin/{hipcc.bin,hipconfig.bin} $out/bin cp ${amdclangxx}/bin/* $out/bin/
wrapProgram $out/bin/hipcc.bin ${lib.concatStringsSep " " wrapperArgs} for prog in hip{cc,config}{,.pl}; do
wrapProgram $out/bin/hipconfig.bin ${lib.concatStringsSep " " wrapperArgs} wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs}
wrapProgram $out/bin/hipcc.pl ${lib.concatStringsSep " " wrapperArgs} done
wrapProgram $out/bin/hipconfig.pl ${lib.concatStringsSep " " wrapperArgs}
mkdir -p $out/nix-support/
echo '
export HIP_PATH="${placeholder "out"}"
export HIP_PLATFORM=amd
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
export NIX_CC_USE_RESPONSE_FILE=0
export HIP_CLANG_PATH="${hipClangPath}"
export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}"
export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook
# Just link rocminfo, it's easier # Just link rocminfo, it's easier
ln -s ${rocminfo}/bin/* $out/bin ln -s ${rocminfo}/bin/* $out/bin
ln -s ${rocm-core}/include/* $out/include/
# Replace rocm-opencl-icd functionality # Replace rocm-opencl-icd functionality
mkdir -p $icd/etc/OpenCL/vendors mkdir -p $icd/etc/OpenCL/vendors
echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd
# add version info to output (downstream rocmPackages look for this) # add version info to output (downstream rocmPackages look for this)
mkdir $out/.info ln -s ${rocm-core}/.info/ $out/.info
echo "${finalAttrs.version}" > $out/.info/version
ln -s ${hipClang} $out/llvm
''; '';
passthru = { disallowedRequisites = [
# All known and valid general GPU targets gcc-unwrapped
# We cannot use this for each ROCm library, as each defines their own supported targets ];
# See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
gpuTargets = lib.forEach [
"803"
"900"
"906"
"908"
"90a"
"940"
"941"
"942"
"1010"
"1012"
"1030"
"1100"
"1101"
"1102"
] (target: "gfx${target}");
updateScript = rocmUpdateScript { passthru =
name = finalAttrs.pname; {
owner = finalAttrs.src.owner; # All known and valid general GPU targets
repo = finalAttrs.src.repo; # We cannot use this for each ROCm library, as each defines their own supported targets
page = "tags?per_page=1"; # See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
filter = ".[0].name | split(\"-\") | .[1]"; # Generic targets are not yet available in rocm-6.3.1 llvm
}; gpuTargets = lib.forEach [
# "9-generic"
"900" # MI25, Vega 56/64
"906" # MI50/60, Radeon VII
"908" # MI100
"90a" # MI210 / MI250
# "9-4-generic"
# 940/1 - never released publicly, maybe HPE cray specific MI3xx?
"942" # MI300
# "10-1-generic"
"1010"
"1012"
# "10-3-generic"
"1030" # W6800, various Radeon cards
# "11-generic"
"1100"
"1101"
"1102"
] (target: "gfx${target}");
impureTests = { inherit hipClangPath;
rocm-smi = callPackage ./test-rocm-smi.nix {
inherit rocm-smi; updateScript = rocmUpdateScript {
clr = finalAttrs.finalPackage; name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
}; };
opencl-example = callPackage ./test-opencl-example.nix {
clr = finalAttrs.finalPackage; impureTests = {
rocm-smi = callPackage ./test-rocm-smi.nix {
inherit rocm-smi;
clr = finalAttrs.finalPackage;
};
opencl-example = callPackage ./test-opencl-example.nix {
clr = finalAttrs.finalPackage;
};
}; };
selectGpuTargets =
{
supported ? [ ],
}:
supported;
gpuArchSuffix = "";
}
// lib.optionalAttrs (localGpuTargets != null) {
inherit localGpuTargets;
gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets);
selectGpuTargets =
{
supported ? [ ],
}:
if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported;
}; };
};
meta = with lib; { meta = with lib; {
description = "AMD Common Language Runtime for hipamd, opencl, and rocclr"; description = "AMD Common Language Runtime for hipamd, opencl, and rocclr";
@@ -215,8 +281,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,101 @@
From 17e7b7c2ef6023be77b22ae83162e78de0a5a936 Mon Sep 17 00:00:00 2001
From: Anusha GodavarthySurya <anusha.godavarthysurya@amd.com>
Date: Fri, 11 Oct 2024 17:10:12 +0000
Subject: [PATCH] SWDEV-472840 SWDEV-461980 - Fix null stream sync performance
=> If null stream is not created during sync skip nullstrm creation
=> Do cpu wait on blocking & null stream if it exists
Change-Id: I90d6ced6a2dd1782ba58f3fed4e3608fc0efa55a
---
hipamd/src/hip_device.cpp | 23 +++++++++++++++++++----
hipamd/src/hip_internal.hpp | 2 +-
hipamd/src/hip_stream.cpp | 22 ++++++++++++++++------
3 files changed, 36 insertions(+), 11 deletions(-)
diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp
index 9f6a8e3d0..20889b0fe 100644
--- a/hipamd/src/hip_device.cpp
+++ b/hipamd/src/hip_device.cpp
@@ -257,15 +257,30 @@ void Device::destroyAllStreams() {
}
// ================================================================================================
-void Device::SyncAllStreams( bool cpu_wait) {
+void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) {
// Make a local copy to avoid stalls for GPU finish with multiple threads
std::vector<hip::Stream*> streams;
streams.reserve(streamSet.size());
{
amd::ScopedLock lock(streamSetLock);
- for (auto it : streamSet) {
- streams.push_back(it);
- it->retain();
+ if (wait_blocking_streams_only) {
+ auto null_stream = GetNullStream();
+ for (auto it : streamSet) {
+ if (it != null_stream && (it->Flags() & hipStreamNonBlocking) == 0) {
+ streams.push_back(it);
+ it->retain();
+ }
+ }
+ // Add null stream to the end of the list so that wait happens after all blocking streams.
+ if (null_stream != nullptr) {
+ streams.push_back(null_stream);
+ null_stream->retain();
+ }
+ } else {
+ for (auto it : streamSet) {
+ streams.push_back(it);
+ it->retain();
+ }
}
}
for (auto it : streams) {
diff --git a/hipamd/src/hip_internal.hpp b/hipamd/src/hip_internal.hpp
index d0a6dca57..47749c012 100644
--- a/hipamd/src/hip_internal.hpp
+++ b/hipamd/src/hip_internal.hpp
@@ -595,7 +595,7 @@ class stream_per_thread {
void destroyAllStreams();
- void SyncAllStreams( bool cpu_wait = true);
+ void SyncAllStreams( bool cpu_wait = true, bool wait_blocking_streams_only = false);
bool StreamCaptureBlocking();
diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp
index 937374977..76a732acd 100644
--- a/hipamd/src/hip_stream.cpp
+++ b/hipamd/src/hip_stream.cpp
@@ -357,13 +357,23 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) {
HIP_RETURN(hipErrorStreamCaptureUnsupported);
}
}
- bool wait = (stream == nullptr || stream == hipStreamLegacy) ? true : false;
- auto hip_stream = hip::getStream(stream, wait);
- // Wait for the current host queue
- hip_stream->finish();
- // Release freed memory for all memory pools on the device
- hip_stream->GetDevice()->ReleaseFreedMemory();
+ if (stream == nullptr) {
+ // Do cpu wait on null stream and only on blocking streams
+ constexpr bool WaitblockingStreamOnly = true;
+ getCurrentDevice()->SyncAllStreams(true, WaitblockingStreamOnly);
+
+ // Release freed memory for all memory pools on the device
+ getCurrentDevice()->ReleaseFreedMemory();
+ } else {
+ constexpr bool wait = false;
+ auto hip_stream = hip::getStream(stream, wait);
+
+ // Wait for the current host queue
+ hip_stream->finish();
+ // Release freed memory for all memory pools on the device
+ hip_stream->GetDevice()->ReleaseFreedMemory();
+ }
return hipSuccess;
}

View File

@@ -0,0 +1,43 @@
{
buildPythonPackage,
python,
composable_kernel_build,
lib,
setuptools,
setuptools-scm,
rocm-merged-llvm,
}:
buildPythonPackage {
pyproject = true;
pname = "ck4inductor";
build-system = [
setuptools
setuptools-scm
];
version = "6.4.0";
inherit (composable_kernel_build) src;
pythonImportsCheck = [
"ck4inductor"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.op"
];
propagatedBuildInputs = [
# At runtime will fail to compile anything with ck4inductor without this
# can't easily use in checks phase because most of the compiler machinery is in torch
rocm-merged-llvm
];
checkPhase = ''
if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then
echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor"
exit 1
fi
'';
meta = with lib; {
description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
}

View File

@@ -5,20 +5,36 @@
rocmUpdateScript, rocmUpdateScript,
cmake, cmake,
rocm-cmake, rocm-cmake,
rocm-merged-llvm,
clr, clr,
openmp, rocm-device-libs,
clang-tools-extra, rocminfo,
hipify,
git, git,
gtest, gtest,
zstd, zstd,
ninja,
buildTests ? false, buildTests ? false,
buildExamples ? false, buildExamples ? false,
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx1030" ... ] gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
]
),
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "composable_kernel"; pname = "composable_kernel${clr.gpuArchSuffix}";
version = "6.0.2"; # This version must be PEP 440 compatible because it's the version of the ck4inductor python package too
version = "6.4.0-unstable-20241220";
outputs = outputs =
[ [
@@ -31,32 +47,68 @@ stdenv.mkDerivation (finalAttrs: {
"example" "example"
]; ];
patches = [
# for Gentoo this gives a significant speedup in build times
# not observing speedup. possibly because our LLVM has been patched to fix amdgpu-early-inline-all issues?
# ./disable-amdgpu-inline.patch
];
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "composable_kernel"; repo = "composable_kernel";
rev = "rocm-${finalAttrs.version}"; rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
hash = "sha256-NCqMganmNyQfz3X+KQOrfrimnrgd3HbAGK5DeC4+J+o="; hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
git git
cmake cmake
rocm-cmake rocminfo
clr clr
clang-tools-extra hipify
ninja
zstd zstd
]; ];
buildInputs = [ openmp ]; buildInputs = [
rocm-cmake
clr
zstd
];
strictDeps = true;
enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ];
env.ROCM_PATH = clr;
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_CXX_COMPILER=hipcc" "-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
# "-DDL_KERNELS=ON"
# Not turned on because don't think deps require it, slightly speeds up build
# "-DCK_USE_CODEGEN=ON"
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_DEV=OFF"
"-DROCM_PATH=${clr}"
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# FP8 can build for 908/90a but very slow build
# and produces unusably slow kernels that are huge
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
] ]
++ lib.optionals (gpuTargets != [ ]) [ ++ lib.optionals (gpuTargets != [ ]) [
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" # We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" # per readme this is required if archs are dissimilar
# In rocm-6.3.x not setting any arch flag worked
# but setting dissimilar arches always failed
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
@@ -64,22 +116,45 @@ stdenv.mkDerivation (finalAttrs: {
# No flags to build selectively it seems... # No flags to build selectively it seems...
postPatch = postPatch =
lib.optionalString (!buildTests) '' ''
export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode
''
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \ substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(test)" "" --replace-fail "add_subdirectory(test)" ""
substituteInPlace codegen/CMakeLists.txt \
--replace-fail "include(ROCMTest)" ""
'' ''
+ lib.optionalString (!buildExamples) '' + lib.optionalString (!buildExamples) ''
substituteInPlace CMakeLists.txt \ substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(example)" "" --replace-fail "add_subdirectory(example)" ""
'' ''
+ '' + ''
substituteInPlace CMakeLists.txt \ substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(profiler)" "" --replace-fail "add_subdirectory(profiler)" ""
''; '';
# Clamp parallelism based on free memory at build start to avoid OOM
preConfigure = ''
export NINJA_SUMMARIZE_BUILD=1
export NINJA_STATUS="[%r jobs | %P %f/%t @ %o/s | %w | ETA %W ] "
MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE))
MAX_CORES=$((1 + APPX_GB / 2))
MAX_CORES_LINK=$((1 + APPX_GB / 8))
MAX_CORES_LINK=$((MAX_CORES_LINK > NIX_BUILD_CORES ? NIX_BUILD_CORES : MAX_CORES_LINK))
export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))"
echo "Picked new core limits NIX_BUILD_CORES=$NIX_BUILD_CORES LINK_CORES=$LINK_CORES based on available mem: $APPX_GB GB"
cmakeFlagsArray+=(
"-DCK_PARALLEL_LINK_JOBS=$LINK_CORES"
"-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES"
)
'';
postInstall = postInstall =
'' ''
zstd --rm $out/lib/libdevice_operations.a zstd --rm $out/lib/libdevice_*_operations.a
'' ''
+ lib.optionalString buildTests '' + lib.optionalString buildTests ''
mkdir -p $test/bin mkdir -p $test/bin
@@ -92,21 +167,17 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
# Times out otherwise
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; { meta = with lib; {
description = "Performance portable programming model for machine learning tensor operators"; description = "Performance portable programming model for machine learning tensor operators";
homepage = "https://github.com/ROCm/composable_kernel"; homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = # Builds which don't don't target any gfx9 cause cmake errors in dependent projects
versions.minor finalAttrs.version != versions.minor stdenv.cc.version broken = !finalAttrs.passthru.anyGfx9Target;
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,16 @@
Flag -amdgpu-early-inline-all explodes memory consumption, so that build does not fit 64GB of RAM.
LLVM bug: https://github.com/llvm/llvm-project/issues/86332
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -213,11 +213,6 @@ if(NOT WIN32 AND check-coerce AND ${hip_VERSION_FLAT} GREATER 600241132 AND ${hi
message("Adding the amdgpu-coerce-illegal-types=1")
add_compile_options("SHELL: -mllvm -amdgpu-coerce-illegal-types=1")
endif()
-if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132)
- message("Adding -amdgpu-early-inline-all=true and -amdgpu-function-calls=false")
- add_compile_options("SHELL: -mllvm -amdgpu-early-inline-all=true")
- add_compile_options("SHELL: -mllvm -amdgpu-function-calls=false")
-endif()
#
# Seperate linking jobs from compiling
# Too many concurrent linking jobs can break the build

View File

@@ -1,20 +1,29 @@
{ {
runCommandLocal, runCommandLocal,
composable_kernel_build, composable_kernel_build,
ck4inductor,
zstd, zstd,
}: }:
let let
ck = composable_kernel_build; ck = composable_kernel_build;
in in
runCommandLocal "unpack-${ck.name}" runCommandLocal "unpack-${ck.pname}"
{ {
nativeBuildInputs = [ zstd ]; nativeBuildInputs = [ zstd ];
meta = ck.meta; inherit (ck) meta;
} }
'' ''
mkdir -p $out mkdir -p $out
cp -r --no-preserve=mode ${ck}/* $out cp -r --no-preserve=mode ${ck}/* $out
zstd -dv --rm $out/lib/libdevice_operations.a.zst -o $out/lib/libdevice_operations.a for zs in $out/lib/libdevice_*_operations.a.zst; do
zstd -dv --rm "$zs" -o "''${zs/.zst}"
done
substituteInPlace $out/lib/cmake/composable_kernel/*.cmake \ substituteInPlace $out/lib/cmake/composable_kernel/*.cmake \
--replace "${ck}" "$out" --replace "${ck}" "$out"
cp -r --no-preserve=mode ${ck4inductor}/* $out/
if [ ! -e $out/lib/python3.12/site-packages/ck4inductor/library/src/tensor_operation_instance/gpu/gemm_universal ]; then
echo "Missing gemm_universal at expected path for pytorch CK backend"
exit 1
fi
'' ''

View File

@@ -1,530 +1,507 @@
{ stdenv {
, lib lib,
, config config,
, callPackage callPackage,
, recurseIntoAttrs newScope,
, symlinkJoin recurseIntoAttrs,
, fetchFromGitHub symlinkJoin,
, cudaPackages fetchFromGitHub,
, python3Packages ffmpeg_4,
, elfutils boost179,
, boost179 opencv,
, opencv libjpeg_turbo,
, ffmpeg_4 python3Packages,
, libjpeg_turbo triton-llvm,
openmpi,
rocmGpuArches ? [ ],
}: }:
let let
rocmUpdateScript = callPackage ./update.nix { }; outer = lib.makeScope newScope (
in rec { self:
## ROCm ## let
llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript rocm-device-libs rocm-runtime rocm-thunk clr; }); inherit (self) llvm;
pyPackages = python3Packages;
rocm-core = callPackage ./rocm-core { openmpi-orig = openmpi;
inherit rocmUpdateScript; in
stdenv = llvm.rocmClangStdenv; {
}; inherit rocmGpuArches;
buildTests = false;
rocm-cmake = callPackage ./rocm-cmake { buildBenchmarks = false;
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
rocm-thunk = callPackage ./rocm-thunk {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
rocm-smi = python3Packages.callPackage ./rocm-smi {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
# Eventually will be in the LLVM repo
rocm-device-libs = callPackage ./rocm-device-libs {
inherit rocmUpdateScript rocm-cmake;
stdenv = llvm.rocmClangStdenv;
};
rocm-runtime = callPackage ./rocm-runtime {
inherit rocmUpdateScript rocm-device-libs rocm-thunk;
stdenv = llvm.rocmClangStdenv;
};
# Eventually will be in the LLVM repo
rocm-comgr = callPackage ./rocm-comgr {
inherit rocmUpdateScript rocm-cmake rocm-device-libs;
stdenv = llvm.rocmClangStdenv;
};
rocminfo = callPackage ./rocminfo {
inherit rocmUpdateScript rocm-cmake rocm-runtime;
stdenv = llvm.rocmClangStdenv;
};
clang-ocl = callPackage ./clang-ocl {
inherit rocmUpdateScript rocm-cmake rocm-device-libs;
stdenv = llvm.rocmClangStdenv;
};
# Unfree
hsa-amd-aqlprofile-bin = callPackage ./hsa-amd-aqlprofile-bin {
stdenv = llvm.rocmClangStdenv;
};
# Broken, too many errors
rdc = callPackage ./rdc {
inherit rocmUpdateScript rocm-smi rocm-runtime stdenv;
# stdenv = llvm.rocmClangStdenv;
};
rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { inherit stdenv; };
hip-common = callPackage ./hip-common {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
# Eventually will be in the LLVM repo
hipcc = callPackage ./hipcc {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
# Replaces hip, opencl-runtime, and rocclr
clr = callPackage ./clr {
inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo rocm-smi;
inherit (llvm) clang;
stdenv = llvm.rocmClangStdenv;
};
hipify = callPackage ./hipify {
inherit rocmUpdateScript;
inherit (llvm) clang;
stdenv = llvm.rocmClangStdenv;
};
# Needs GCC
rocprofiler = callPackage ./rocprofiler {
inherit rocmUpdateScript clr rocm-core rocm-thunk rocm-device-libs roctracer rocdbgapi rocm-smi hsa-amd-aqlprofile-bin stdenv;
inherit (llvm) clang;
};
# Needs GCC
roctracer = callPackage ./roctracer {
inherit rocmUpdateScript rocm-device-libs rocm-runtime clr stdenv;
};
rocgdb = callPackage ./rocgdb {
inherit rocmUpdateScript rocdbgapi;
stdenv = llvm.rocmClangStdenv;
};
rocdbgapi = callPackage ./rocdbgapi {
inherit rocmUpdateScript rocm-cmake rocm-comgr rocm-runtime;
stdenv = llvm.rocmClangStdenv;
};
rocr-debug-agent = callPackage ./rocr-debug-agent {
inherit rocmUpdateScript clr rocdbgapi;
stdenv = llvm.rocmClangStdenv;
};
rocprim = callPackage ./rocprim {
inherit rocmUpdateScript rocm-cmake clr;
stdenv = llvm.rocmClangStdenv;
};
rocsparse = callPackage ./rocsparse {
inherit rocmUpdateScript rocm-cmake rocprim clr;
stdenv = llvm.rocmClangStdenv;
};
rocthrust = callPackage ./rocthrust {
inherit rocmUpdateScript rocm-cmake rocprim clr;
stdenv = llvm.rocmClangStdenv;
};
rocrand = callPackage ./rocrand {
inherit rocmUpdateScript rocm-cmake clr;
stdenv = llvm.rocmClangStdenv;
};
hiprand = callPackage ./hiprand {
inherit rocmUpdateScript rocm-cmake clr rocrand;
stdenv = llvm.rocmClangStdenv;
};
rocfft = callPackage ./rocfft {
inherit rocmUpdateScript rocm-cmake rocrand rocfft clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rccl = callPackage ./rccl {
inherit rocmUpdateScript rocm-cmake rocm-smi clr hipify;
stdenv = llvm.rocmClangStdenv;
};
hipcub = callPackage ./hipcub {
inherit rocmUpdateScript rocm-cmake rocprim clr;
stdenv = llvm.rocmClangStdenv;
};
hipsparse = callPackage ./hipsparse {
inherit rocmUpdateScript rocm-cmake rocsparse clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
hipfort = callPackage ./hipfort {
inherit rocmUpdateScript rocm-cmake;
stdenv = llvm.rocmClangStdenv;
};
hipfft = callPackage ./hipfft {
inherit rocmUpdateScript rocm-cmake rocfft clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
tensile = python3Packages.callPackage ./tensile {
inherit rocmUpdateScript rocminfo;
stdenv = llvm.rocmClangStdenv;
};
rocblas = callPackage ./rocblas {
inherit rocmUpdateScript rocm-cmake clr tensile;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rocsolver = callPackage ./rocsolver {
inherit rocmUpdateScript rocm-cmake rocblas rocsparse clr;
stdenv = llvm.rocmClangStdenv;
};
rocwmma = callPackage ./rocwmma {
inherit rocmUpdateScript rocm-cmake rocm-smi rocblas clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rocalution = callPackage ./rocalution {
inherit rocmUpdateScript rocm-cmake rocprim rocsparse rocrand rocblas clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rocmlir = callPackage ./rocmlir {
inherit rocmUpdateScript rocm-cmake rocminfo clr;
stdenv = llvm.rocmClangStdenv;
};
rocmlir-rock = rocmlir.override {
buildRockCompiler = true;
};
hipsolver = callPackage ./hipsolver {
inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr;
stdenv = llvm.rocmClangStdenv;
};
hipblas = callPackage ./hipblas {
inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr;
stdenv = llvm.rocmClangStdenv;
};
# hipBlasLt - Very broken with Tensile at the moment, only supports GFX9
# hipTensor - Only supports GFX9
composable_kernel = callPackage ./composable_kernel/unpack.nix {
composable_kernel_build = callPackage ./composable_kernel {
inherit rocmUpdateScript rocm-cmake clr;
inherit (llvm) openmp clang-tools-extra;
stdenv = llvm.rocmClangStdenv; stdenv = llvm.rocmClangStdenv;
};
};
half = callPackage ./half { rocmPath = self.callPackage ./rocm-path { };
inherit rocmUpdateScript rocm-cmake; rocmUpdateScript = self.callPackage ./update.nix { };
stdenv = llvm.rocmClangStdenv;
};
miopen = callPackage ./miopen { ## ROCm ##
inherit rocmUpdateScript rocm-cmake rocblas clang-ocl composable_kernel rocm-comgr clr rocm-docs-core half roctracer; llvm = recurseIntoAttrs (
inherit (llvm) clang-tools-extra; callPackage ./llvm/default.nix {
stdenv = llvm.rocmClangStdenv; inherit (self) rocm-device-libs rocm-runtime;
rocmlir = rocmlir-rock; }
boost = boost179.override { enableStatic = true; }; );
}; inherit (self.llvm) rocm-merged-llvm clang openmp;
miopen-hip = miopen; rocm-core = self.callPackage ./rocm-core { };
amdsmi = pyPackages.callPackage ./amdsmi {
migraphx = callPackage ./migraphx { inherit (self) rocmUpdateScript;
inherit rocmUpdateScript rocm-cmake rocblas composable_kernel miopen clr half rocm-device-libs;
inherit (llvm) openmp clang-tools-extra;
stdenv = llvm.rocmClangStdenv;
rocmlir = rocmlir-rock;
};
rpp = callPackage ./rpp {
inherit rocmUpdateScript rocm-cmake rocm-docs-core clr half;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rpp-hip = rpp.override {
useOpenCL = false;
useCPU = false;
};
rpp-opencl = rpp.override {
useOpenCL = true;
useCPU = false;
};
rpp-cpu = rpp.override {
useOpenCL = false;
useCPU = true;
};
mivisionx = callPackage ./mivisionx {
inherit rocmUpdateScript rocm-cmake rocm-device-libs clr rpp rocblas miopen migraphx half rocm-docs-core;
inherit (llvm) clang openmp;
opencv = opencv.override { enablePython = true; };
ffmpeg = ffmpeg_4;
stdenv = llvm.rocmClangStdenv;
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
# See: https://github.com/ROCm/MIVisionX/issues/1051
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
version = "2.0.6.1";
src = fetchFromGitHub {
owner = "rrawther";
repo = "libjpeg-turbo";
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
}; };
# overwrite all patches, since patches for newer version do not apply rocm-cmake = self.callPackage ./rocm-cmake { };
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
};
};
mivisionx-hip = mivisionx.override { rocm-smi = pyPackages.callPackage ./rocm-smi {
rpp = rpp-hip; inherit (self) rocmUpdateScript;
useOpenCL = false; };
useCPU = false;
};
mivisionx-cpu = mivisionx.override { rocm-device-libs = self.callPackage ./rocm-device-libs {
rpp = rpp-cpu; inherit (llvm) rocm-merged-llvm;
useOpenCL = false; };
useCPU = true;
};
## Meta ## rocm-runtime = self.callPackage ./rocm-runtime {
# Emulate common ROCm meta layout inherit (llvm) rocm-merged-llvm;
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations };
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
meta = rec {
rocm-developer-tools = symlinkJoin {
name = "rocm-developer-tools-meta";
paths = [ rocm-comgr = self.callPackage ./rocm-comgr {
hsa-amd-aqlprofile-bin inherit (llvm) rocm-merged-llvm;
rocm-core };
rocr-debug-agent
roctracer
rocdbgapi
rocprofiler
rocgdb
rocm-language-runtime
];
};
rocm-ml-sdk = symlinkJoin { rocminfo = self.callPackage ./rocminfo { };
name = "rocm-ml-sdk-meta";
paths = [ # Unfree
rocm-core hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { };
miopen-hip
rocm-hip-sdk
rocm-ml-libraries
];
};
rocm-ml-libraries = symlinkJoin { rdc = self.callPackage ./rdc { };
name = "rocm-ml-libraries-meta";
paths = [ rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { };
llvm.clang
llvm.mlir
llvm.openmp
rocm-core
miopen-hip
rocm-hip-libraries
];
};
rocm-hip-sdk = symlinkJoin { hip-common = self.callPackage ./hip-common { };
name = "rocm-hip-sdk-meta";
paths = [ # Eventually will be in the LLVM repo
rocprim hipcc = self.callPackage ./hipcc {
rocalution inherit (llvm) rocm-merged-llvm;
hipfft };
rocm-core
hipcub
hipblas
rocrand
rocfft
rocsparse
rccl
rocthrust
rocblas
hipsparse
hipfort
rocwmma
hipsolver
rocsolver
rocm-hip-libraries
rocm-hip-runtime-devel
];
};
rocm-hip-libraries = symlinkJoin { # Replaces hip, opencl-runtime, and rocclr
name = "rocm-hip-libraries-meta"; clr = self.callPackage ./clr { };
paths = [ aotriton = self.callPackage ./aotriton { };
rocblas
hipfort
rocm-core
rocsolver
rocalution
rocrand
hipblas
rocfft
hipfft
rccl
rocsparse
hipsparse
hipsolver
rocm-hip-runtime
];
};
rocm-openmp-sdk = symlinkJoin { hipify = self.callPackage ./hipify {
name = "rocm-openmp-sdk-meta"; inherit (llvm)
clang
rocm-merged-llvm
;
};
paths = [ # hsakmt was merged into rocm-runtime
rocm-core hsakmt = self.rocm-runtime;
llvm.clang
llvm.mlir
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
rocm-language-runtime
];
};
rocm-opencl-sdk = symlinkJoin { rocprofiler = self.callPackage ./rocprofiler {
name = "rocm-opencl-sdk-meta"; inherit (llvm) clang;
};
rocprofiler-register = self.callPackage ./rocprofiler-register {
inherit (llvm) clang;
};
paths = [ # Needs GCC
rocm-core roctracer = self.callPackage ./roctracer { };
rocm-runtime
clr
clr.icd
rocm-thunk
rocm-opencl-runtime
];
};
rocm-opencl-runtime = symlinkJoin { rocgdb = self.callPackage ./rocgdb { };
name = "rocm-opencl-runtime-meta";
paths = [ rocdbgapi = self.callPackage ./rocdbgapi { };
rocm-core
clr
clr.icd
rocm-language-runtime
];
};
rocm-hip-runtime-devel = symlinkJoin { rocr-debug-agent = self.callPackage ./rocr-debug-agent { };
name = "rocm-hip-runtime-devel-meta";
paths = [ rocprim = self.callPackage ./rocprim { };
clr
rocm-core
hipify
rocm-cmake
llvm.clang
llvm.mlir
llvm.openmp
rocm-thunk
rocm-runtime
rocm-hip-runtime
];
};
rocm-hip-runtime = symlinkJoin { rocsparse = self.callPackage ./rocsparse { };
name = "rocm-hip-runtime-meta";
paths = [ rocthrust = self.callPackage ./rocthrust { };
rocm-core
rocminfo
clr
rocm-language-runtime
];
};
rocm-language-runtime = symlinkJoin { rocrand = self.callPackage ./rocrand { };
name = "rocm-language-runtime-meta";
paths = [ hiprand = self.callPackage ./hiprand { };
rocm-runtime
rocm-core
rocm-comgr
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
];
};
rocm-all = symlinkJoin { rocfft = self.callPackage ./rocfft { };
name = "rocm-all-meta";
paths = [ mscclpp = self.callPackage ./mscclpp { };
rocm-developer-tools
rocm-ml-sdk
rocm-ml-libraries
rocm-hip-sdk
rocm-hip-libraries
rocm-openmp-sdk
rocm-opencl-sdk
rocm-opencl-runtime
rocm-hip-runtime-devel
rocm-hip-runtime
rocm-language-runtime
];
};
};
} // lib.optionalAttrs config.allowAliases {
miopengemm= throw ''
'miopengemm' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
miopen-opencl= throw '' rccl = self.callPackage ./rccl { };
'miopen-opencl' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
mivisionx-opencl = throw '' # RCCL with sanitizers and tests
'mivisionx-opencl' has been deprecated. # Can't have with sanitizer build as dep of other packages without
Other versions of mivisionx are still available. # runtime crashes due to ASAN not loading first
It is also still available for some time as part of rocmPackages_5. rccl-tests = self.callPackage ./rccl {
''; # Added 2024-3-24 buildTests = true;
};
hipcub = self.callPackage ./hipcub { };
hipsparse = self.callPackage ./hipsparse { };
hipfort = self.callPackage ./hipfort { };
hipfft = self.callPackage ./hipfft { };
tensile = pyPackages.callPackage ./tensile {
inherit (self)
rocmUpdateScript
clr
;
};
rocblas = self.callPackage ./rocblas {
buildTests = true;
buildBenchmarks = true;
};
rocsolver = self.callPackage ./rocsolver { };
rocwmma = self.callPackage ./rocwmma { };
rocalution = self.callPackage ./rocalution { };
rocmlir = self.callPackage ./rocmlir {
buildRockCompiler = true;
};
hipsolver = self.callPackage ./hipsolver { };
hipblas-common = self.callPackage ./hipblas-common { };
hipblas = self.callPackage ./hipblas { };
hipblaslt = self.callPackage ./hipblaslt { };
# hipTensor - Only supports GFX9
composable_kernel_build = self.callPackage ./composable_kernel { };
# FIXME: we have compressed code objects now, may be able to skip two stages?
composable_kernel = self.callPackage ./composable_kernel/unpack.nix { };
ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix {
inherit (self) composable_kernel_build;
inherit (llvm) rocm-merged-llvm;
};
half = self.callPackage ./half { };
miopen = self.callPackage ./miopen {
boost = boost179.override { enableStatic = true; };
};
miopen-hip = self.miopen;
migraphx = self.callPackage ./migraphx { };
rpp = self.callPackage ./rpp { };
rpp-hip = self.rpp.override {
useOpenCL = false;
useCPU = false;
};
rpp-opencl = self.rpp.override {
useOpenCL = true;
useCPU = false;
};
rpp-cpu = self.rpp.override {
useOpenCL = false;
useCPU = true;
};
mivisionx = self.callPackage ./mivisionx {
opencv = opencv.override { enablePython = true; };
# TODO: Remove this pin in ROCm 6.4+
# FFMPEG support was improved in https://github.com/ROCm/MIVisionX/pull/1460
ffmpeg = ffmpeg_4;
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
# See: https://github.com/ROCm/MIVisionX/issues/1051
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
version = "2.0.6.1";
src = fetchFromGitHub {
owner = "rrawther";
repo = "libjpeg-turbo";
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
};
# overwrite all patches, since patches for newer version do not apply
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
};
};
mivisionx-hip = self.mivisionx.override {
rpp = self.rpp-hip;
useOpenCL = false;
useCPU = false;
};
mivisionx-cpu = self.mivisionx.override {
rpp = self.rpp-cpu;
useOpenCL = false;
useCPU = true;
};
# Even if config.rocmSupport is false we need rocmSupport true
# version of ucc/ucx in openmpi in this package set
openmpi = openmpi-orig.override (
prev:
let
ucx = prev.ucx.override {
enableCuda = false;
enableRocm = true;
};
in
{
inherit ucx;
ucc = prev.ucc.override {
enableCuda = false;
inherit ucx;
};
}
);
mpi = self.openmpi;
triton-llvm = triton-llvm.overrideAttrs {
src = fetchFromGitHub {
owner = "llvm";
repo = "llvm-project";
# make sure this matches triton llvm rel branch hash for now
# https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt
rev = "86b69c31642e98f8357df62c09d118ad1da4e16a";
hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE=";
};
pname = "triton-llvm-rocm";
patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase
};
triton = pyPackages.callPackage ./triton { rocmPackages = self; };
## Meta ##
# Emulate common ROCm meta layout
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
meta = with self; rec {
rocm-developer-tools = symlinkJoin {
name = "rocm-developer-tools-meta";
paths = [
hsa-amd-aqlprofile-bin
rocm-core
rocr-debug-agent
roctracer
rocdbgapi
rocprofiler
rocgdb
rocm-language-runtime
];
};
rocm-ml-sdk = symlinkJoin {
name = "rocm-ml-sdk-meta";
paths = [
rocm-core
miopen-hip
rocm-hip-sdk
rocm-ml-libraries
];
};
rocm-ml-libraries = symlinkJoin {
name = "rocm-ml-libraries-meta";
paths = [
llvm.clang
llvm.mlir
llvm.openmp
rocm-core
miopen-hip
rocm-hip-libraries
];
};
rocm-hip-sdk = symlinkJoin {
name = "rocm-hip-sdk-meta";
paths = [
rocprim
rocalution
hipfft
rocm-core
hipcub
hipblas
hipblaslt
rocrand
rocfft
rocsparse
rccl
rocthrust
rocblas
hipsparse
hipfort
rocwmma
hipsolver
rocsolver
rocm-hip-libraries
rocm-hip-runtime-devel
];
};
rocm-hip-libraries = symlinkJoin {
name = "rocm-hip-libraries-meta";
paths = [
rocblas
hipfort
rocm-core
rocsolver
rocalution
rocrand
hipblas
hipblaslt
rocfft
hipfft
rccl
rocsparse
hipsparse
hipsolver
rocm-hip-runtime
];
};
rocm-openmp-sdk = symlinkJoin {
name = "rocm-openmp-sdk-meta";
paths = [
rocm-core
llvm.clang
llvm.mlir
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
rocm-language-runtime
];
};
rocm-opencl-sdk = symlinkJoin {
name = "rocm-opencl-sdk-meta";
paths = [
rocm-core
rocm-runtime
clr
clr.icd
rocm-opencl-runtime
];
};
rocm-opencl-runtime = symlinkJoin {
name = "rocm-opencl-runtime-meta";
paths = [
rocm-core
clr
clr.icd
rocm-language-runtime
];
};
rocm-hip-runtime-devel = symlinkJoin {
name = "rocm-hip-runtime-devel-meta";
paths = [
clr
rocm-core
hipify
rocm-cmake
llvm.clang
llvm.mlir
llvm.openmp
rocm-runtime
rocm-hip-runtime
];
};
rocm-hip-runtime = symlinkJoin {
name = "rocm-hip-runtime-meta";
paths = [
rocm-core
rocminfo
clr
rocm-language-runtime
];
};
rocm-language-runtime = symlinkJoin {
name = "rocm-language-runtime-meta";
paths = [
rocm-runtime
rocm-core
rocm-comgr
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
];
};
rocm-all = symlinkJoin {
name = "rocm-all-meta";
paths = [
rocm-developer-tools
rocm-ml-sdk
rocm-ml-libraries
rocm-hip-sdk
rocm-hip-libraries
rocm-openmp-sdk
rocm-opencl-sdk
rocm-opencl-runtime
rocm-hip-runtime-devel
rocm-hip-runtime
rocm-language-runtime
];
};
};
rocm-tests = self.callPackage ./rocm-tests {
rocmPackages = self;
};
}
// lib.optionalAttrs config.allowAliases {
rocm-thunk = throw ''
'rocm-thunk' has been removed. It's now part of the ROCm runtime.
''; # Added 2025-3-16
clang-ocl = throw ''
'clang-ocl' has been deprecated upstream. Use ROCm's clang directly.
''; # Added 2025-3-16
miopengemm = throw ''
'miopengemm' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
miopen-opencl = throw ''
'miopen-opencl' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
mivisionx-opencl = throw ''
'mivisionx-opencl' has been deprecated.
Other versions of mivisionx are still available.
It is also still available for some time as part of rocmPackages_5.
''; # Added 2024-3-24
}
);
scopeForArches =
arches:
outer.overrideScope (
_final: prev: {
clr = prev.clr.override {
localGpuTargets = arches;
};
}
);
in
outer
// builtins.listToAttrs (
builtins.map (arch: {
name = arch;
value = scopeForArches [ arch ];
}) outer.clr.gpuTargets
)
// {
gfx9 = scopeForArches [
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
];
gfx10 = scopeForArches [
"gfx1010"
"gfx1030"
];
gfx11 = scopeForArches [
"gfx1100"
"gfx1101"
"gfx1102"
];
} }

View File

@@ -1,20 +1,21 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
, rocm-cmake cmake,
rocm-cmake,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "half"; pname = "half";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "half"; repo = "half";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-wvl8ny7pbY9hUGGtJ70R7/4YIsahgI7qcVzUnxmUfZM="; hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -24,8 +25,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -34,6 +35,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.unix; platforms = platforms.unix;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,18 +1,21 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
rocmUpdateScript,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hip-common"; pname = "hip-common";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "HIP"; repo = "HIP";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-51u3By0R4LKoWiklNacFP6HILL845jxpN6FD7rQB+zQ="; hash = "sha256-y85S2fULvbQfwxZukIsMLuQAqWEv1kHL8fdozK4kj5I=";
# rev = "5f2d2d109c34e749d7947b48834098eec26a5e67";
# hash = "sha256-Lws65mzRJZP/JE9UiHHfX4Y3zOYA6FPxgbAea48D9Gk=";
}; };
dontConfigure = true; dontConfigure = true;
@@ -29,8 +32,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -39,6 +42,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,29 @@
{
lib,
stdenv,
cmake,
fetchFromGitHub,
rocm-cmake,
rocmUpdateScript,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipblas-common";
version = "6.3.1";
nativeBuildInputs = [
cmake
rocm-cmake
];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Common files shared by hipBLAS and hipBLASLt";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
})

View File

@@ -2,13 +2,17 @@
lib, lib,
stdenv, stdenv,
fetchFromGitHub, fetchFromGitHub,
fetchpatch,
rocmUpdateScript, rocmUpdateScript,
cmake, cmake,
rocm-cmake, rocm-cmake,
clr, clr,
gfortran, gfortran,
hipblas-common,
rocblas, rocblas,
rocsolver, rocsolver,
rocsparse,
rocprim,
gtest, gtest,
lapack-reference, lapack-reference,
buildTests ? false, buildTests ? false,
@@ -19,7 +23,7 @@
# Can also use cuBLAS # Can also use cuBLAS
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipblas"; pname = "hipblas";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -39,9 +43,23 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "hipBLAS"; repo = "hipBLAS";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-Fq7o2sMmHlHIv9UKJw+u/h9K/ZhKVJWwosYTdYIsscA="; hash = "sha256-Rz1KAhBUbvErHTF2PM1AkVhqo4OHldfSNMSpp5Tx9yk=";
}; };
patches = [
# https://github.com/ROCm/hipBLAS/pull/952
(fetchpatch {
name = "transitively-depend-hipblas-common.patch";
url = "https://github.com/ROCm/hipBLAS/commit/54220fdaebf0fb4fd0921ee9e418ace5b143ec8f.patch";
hash = "sha256-MFEhv8Bkrd2zD0FFIDg9oJzO7ztdyMAF+R9oYA0rmwQ=";
})
];
postPatch = ''
substituteInPlace library/CMakeLists.txt \
--replace-fail "find_package(Git REQUIRED)" ""
'';
nativeBuildInputs = [ nativeBuildInputs = [
cmake cmake
rocm-cmake rocm-cmake
@@ -49,9 +67,13 @@ stdenv.mkDerivation (finalAttrs: {
gfortran gfortran
]; ];
propagatedBuildInputs = [ hipblas-common ];
buildInputs = buildInputs =
[ [
rocblas rocblas
rocprim
rocsparse
rocsolver rocsolver
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
@@ -63,13 +85,16 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_CXX_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=${lib.getExe' clr "hipcc"}"
# Upstream is migrating to amdclang++, it is likely this will be correct in next version bump
#"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}"
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON" "-DBUILD_CLIENTS_TESTS=ON"
@@ -100,8 +125,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -110,8 +135,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,235 @@
{
lib,
stdenv,
fetchpatch,
fetchFromGitHub,
cmake,
rocm-cmake,
clr,
gfortran,
gtest,
msgpack,
libxml2,
python3,
python3Packages,
openmp,
hipblas-common,
tensile,
lapack-reference,
ncurses,
libffi,
zlib,
zstd,
rocmUpdateScript,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
# hipblaslt supports only devices with MFMA or WMMA
# WMMA on gfx1100 may be broken
# MFMA on MI100 may be broken
# MI200/MI300 known to work
gpuTargets ? (
clr.localGpuTargets or [
# "gfx908" FIXME: confirm MFMA on MI100 works
"gfx90a"
"gfx942"
# "gfx1100" FIXME: confirm WMMA targets work
]
),
}:
stdenv.mkDerivation (
finalAttrs:
let
supportsTargetArches =
(builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets)
|| (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets);
tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs {
inherit (finalAttrs) src;
sourceRoot = "${finalAttrs.src.name}/tensilelite";
};
py = python3.withPackages (ps: [
ps.pyyaml
ps.setuptools
ps.packaging
]);
gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets);
compiler = "amdclang++";
cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly
in
{
pname = "hipblaslt${clr.gpuArchSuffix}";
version = "6.3.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLASLt";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-ozfHwsxcczzYXN9SIkyfRvdtaCqlDN4bh3UHZNS2oVQ=";
};
env.CXX = compiler;
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
env.ROCM_PATH = "${clr}";
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
# Some tensile scripts look for this as an env var rather than a cmake flag
env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++";
requiredSystemFeatures = [ "big-parallel" ];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
postPatch = ''
mkdir -p build/Tensile/library
# git isn't needed and we have no .git
substituteInPlace cmake/Dependencies.cmake \
--replace-fail "find_package(Git REQUIRED)" ""
substituteInPlace CMakeLists.txt \
--replace-fail "include(virtualenv)" "" \
--replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \
--replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \
--replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
# FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand
# Working around for now by disabling the ExtOp libs
substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \
--replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries'
substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \
--replace-fail '${"\${rocm_path}"}/bin/' ""
'';
# Apply patches to allow building without a target arch if we need to do that
patches = lib.optionals (!supportsTargetArches) [
# Add ability to build without specitying any arch.
(fetchpatch {
sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk=";
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch";
})
# Followup to above patch for 6.3.x
(fetchpatch {
sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10=";
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch";
})
];
doCheck = false;
doInstallCheck = false;
nativeBuildInputs = [
cmake
rocm-cmake
py
clr
gfortran
# need make to get streaming console output so nix knows build is still running
# so deliberately not using ninja
];
buildInputs =
[
hipblas-common
tensile'
openmp
libffi
ncurses
# Tensile deps - not optional, building without tensile isn't actually supported
msgpack # FIXME: not included in cmake!
libxml2
python3Packages.msgpack
python3Packages.joblib
zlib
zstd
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags =
[
"-Wno-dev"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
"-DTENSILE_USE_HIP=ON"
"-DTENSILE_BUILD_CLIENT=OFF"
"-DTENSILE_USE_FLOAT16_BUILTIN=ON"
"-DCMAKE_CXX_COMPILER=${compiler}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DHIPBLASLT_ENABLE_MARKER=Off"
# FIXME what are the implications of hardcoding this?
"-DTensile_CODE_OBJECT_VERSION=V5"
"-DTensile_COMPILER=${compiler}"
"-DAMDGPU_TARGETS=${gpuTargets'}"
"-DGPU_TARGETS=${gpuTargets'}"
"-DTensile_LIBRARY_FORMAT=msgpack"
]
++ lib.optionals (!supportsTargetArches) [
"-DBUILD_WITH_TENSILE=OFF"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
# If this is false there are no kernels in the output lib
# and it's useless at runtime
# so if it's an optional dep it's best to not depend on it
# Some packages like torch need hipblaslt to compile
# and are fine ignoring it at runtime if it's not supported
# so we have to support building an empty hipblaslt
passthru.supportsTargetArches = supportsTargetArches;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner repo;
};
passthru.tensilelite = tensile';
meta = with lib; {
description = "hipBLASLt is a library that provides general matrix-matrix operations with a flexible API";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
}
)

View File

@@ -0,0 +1,22 @@
diff --git a/library/src/amd_detail/rocblaslt/src/CMakeLists.txt b/library/src/amd_detail/rocblaslt/src/CMakeLists.txt
index 3d5ace35..8c5a3841 100644
--- a/library/src/amd_detail/rocblaslt/src/CMakeLists.txt
+++ b/library/src/amd_detail/rocblaslt/src/CMakeLists.txt
@@ -58,6 +58,8 @@ if( BUILD_WITH_TENSILE )
set(Tensile_Options ${Tensile_Options} LAZY_LIBRARY_LOADING)
endif()
+ #TensileCreateExtOpLibraries("${PROJECT_BINARY_DIR}/Tensile/library" "${Tensile_ARCHITECTURE}")
+
# Add a build target for Tensile kernel library
# Runtime language is HIP by default
# warning our Tensile_ variables may shadow variable in TensileCreateLibraryFiles
@@ -86,8 +88,6 @@ if( BUILD_WITH_TENSILE )
)
endif()
- TensileCreateExtOpLibraries("${PROJECT_BINARY_DIR}/Tensile/library" "${Tensile_ARCHITECTURE}")
-
# Create a unique name for TensileHost compiled for rocBLAS
set_target_properties( TensileHost PROPERTIES OUTPUT_NAME rocblaslt-tensile CXX_EXTENSIONS NO )

View File

@@ -0,0 +1,39 @@
From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Fri, 11 Oct 2024 02:56:22 -0700
Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived
platforms
Fixes UB when hipBinUtilPtr_ is used.
---
amd/hipcc/src/hipBin_amd.h | 1 -
amd/hipcc/src/hipBin_nvidia.h | 1 -
2 files changed, 2 deletions(-)
diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h
index 0a782d1beab9..36cd625ae8bc 100644
--- a/src/hipBin_amd.h
+++ b/src/hipBin_amd.h
@@ -42,7 +42,6 @@ THE SOFTWARE.
class HipBinAmd : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string hipClangPath_ = "";
string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_;
PlatformInfo platformInfoAMD_;
diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h
index ff142cc1cea2..09b7b80979c7 100644
--- a/src/hipBin_nvidia.h
+++ b/src/hipBin_nvidia.h
@@ -31,7 +31,6 @@ THE SOFTWARE.
class HipBinNvidia : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string cudaPath_ = "";
PlatformInfo platformInfoNV_;
string hipCFlags_, hipCXXFlags_, hipLdFlags_;
--
2.46.0

View File

@@ -1,49 +1,46 @@
{ {
lib, lib,
stdenv, stdenv,
fetchFromGitHub, rocm-merged-llvm,
rocmUpdateScript,
cmake, cmake,
lsb-release, lsb-release,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipcc"; pname = "hipcc";
version = "6.0.2"; # In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = fetchFromGitHub { src = rocm-merged-llvm.llvm-src;
owner = "ROCm"; sourceRoot = "${finalAttrs.src.name}/amd/hipcc";
repo = "HIPCC";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-/LRQN+RSMBPk2jS/tdp3psUL/B0RJZQhRri7e67KsG4=";
};
nativeBuildInputs = [ cmake ]; nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-merged-llvm ];
patches = [
# https://github.com/ROCm/llvm-project/pull/183
# Fixes always-invoked UB in hipcc
./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch
];
postPatch = '' postPatch = ''
substituteInPlace src/hipBin_amd.h \ substituteInPlace src/hipBin_amd.h \
--replace "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release" --replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release"
''; '';
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
];
postInstall = '' postInstall = ''
rm -r $out/hip/bin rm -r $out/hip/bin
ln -s $out/bin $out/hip/bin ln -s $out/bin $out/hip/bin
''; '';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
meta = with lib; { meta = with lib; {
description = "Compiler driver utility that calls clang or nvcc"; description = "Compiler driver utility that calls clang or nvcc";
homepage = "https://github.com/ROCm/HIPCC"; homepage = "https://github.com/ROCm/HIPCC";
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,36 +1,40 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
, rocm-cmake cmake,
, rocprim rocm-cmake,
, clr rocprim,
, gtest clr,
, gbenchmark gtest,
, buildTests ? false gbenchmark,
, buildBenchmarks ? false buildTests ? false,
, gpuTargets ? [ ] buildBenchmarks ? false,
gpuTargets ? [ ],
}: }:
# CUB can also be used as a backend instead of rocPRIM. # CUB can also be used as a backend instead of rocPRIM.
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipcub"; pname = "hipcub";
version = "6.0.2"; version = "6.3.1";
outputs = [ outputs =
"out" [
] ++ lib.optionals buildTests [ "out"
"test" ]
] ++ lib.optionals buildBenchmarks [ ++ lib.optionals buildTests [
"benchmark" "test"
]; ]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "hipCUB"; repo = "hipCUB";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-8QzVgj0JSb86zEG3sj5AAt9pG3frw+xrjEOTo7xCIrc="; hash = "sha256-uECOQWG9C64tg5YZdm9/3+fZXaZVGslu8vElK3m23GY=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -39,44 +43,53 @@ stdenv.mkDerivation (finalAttrs: {
clr clr
]; ];
buildInputs = [ buildInputs =
rocprim [
] ++ lib.optionals buildTests [ rocprim
gtest ]
] ++ lib.optionals buildBenchmarks [ ++ lib.optionals buildTests [
gbenchmark gtest
]; ]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [ cmakeFlags =
"-DCMAKE_CXX_COMPILER=hipcc" [
"-DHIP_ROOT_DIR=${clr}" "-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
] ++ lib.optionals (gpuTargets != [ ]) [ ]
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ++ lib.optionals (gpuTargets != [ ]) [
] ++ lib.optionals buildTests [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DBUILD_TEST=ON" ]
] ++ lib.optionals buildBenchmarks [ ++ lib.optionals buildTests [
"-DBUILD_BENCHMARK=ON" "-DBUILD_TEST=ON"
]; ]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall = lib.optionalString buildTests '' postInstall =
mkdir -p $test/bin lib.optionalString buildTests ''
mv $out/bin/test_* $test/bin mkdir -p $test/bin
'' + lib.optionalString buildBenchmarks '' mv $out/bin/test_* $test/bin
mkdir -p $benchmark/bin ''
mv $out/bin/benchmark_* $benchmark/bin + lib.optionalString buildBenchmarks ''
'' + lib.optionalString (buildTests || buildBenchmarks) '' mkdir -p $benchmark/bin
rmdir $out/bin mv $out/bin/benchmark_* $benchmark/bin
''; ''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -85,6 +98,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ bsd3 ]; license = with licenses; [ bsd3 ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -22,7 +22,7 @@
# Can also use cuFFT # Can also use cuFFT
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipfft"; pname = "hipfft";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -42,7 +42,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "hipFFT"; repo = "hipFFT";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-DjjNQryJdl7RmaMQRQPWkleweEWMIwH/xXU84GGjoC0="; hash = "sha256-Jq/YHEtOo7a0/Ki7gxZATKmSqPU6cyLf5gx3A4MAZNw=";
fetchSubmodules = true; fetchSubmodules = true;
}; };
@@ -111,8 +111,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -121,8 +121,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,21 +1,22 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
, rocm-cmake cmake,
, gfortran rocm-cmake,
gfortran,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipfort"; pname = "hipfort";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "hipfort"; repo = "hipfort";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-3PIqSDyDlY0oVSEx20EPlKGYNkc9xPZtIG3Sbw69esE="; hash = "sha256-cokHxyb4NDMHeq7RIVz7PBuUKRIHyGdZgDgF6Za4fHM=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -51,8 +52,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -61,6 +62,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; # mitx11 license = with licenses; [ mit ]; # mitx11
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,39 +1,57 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
, clang cmake,
, libxml2 clang,
libxml2,
rocm-merged-llvm,
zlib,
zstd,
perl,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipify"; pname = "hipify";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "HIPIFY"; repo = "HIPIFY";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-nNyWrPPhUwT7FyASzc3kf5NCTzeqvHybVOc+6hBzkA4="; hash = "sha256-o/1LNsNtAyQcSug1gf7ujGNRRbvC33kwldrJKZi2LA0=";
}; };
nativeBuildInputs = [ cmake ]; nativeBuildInputs = [
buildInputs = [ libxml2 ]; cmake
];
buildInputs = [
libxml2
rocm-merged-llvm
zlib
zstd
perl
];
postPatch = '' postPatch = ''
substituteInPlace CMakeLists.txt \ substituteInPlace CMakeLists.txt \
--replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang" --replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang"
chmod +x bin/*
''; '';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
postInstall = '' postInstall = ''
patchShebangs $out/bin chmod +x $out/bin/*
chmod +x $out/libexec/*
patchShebangs $out/bin/
patchShebangs $out/libexec/
''; '';
meta = with lib; { meta = with lib; {
@@ -42,6 +60,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -14,7 +14,7 @@
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hiprand"; pname = "hiprand";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -28,7 +28,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "hipRAND"; repo = "hipRAND";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-uGHzOhUX5JEknVFwhHhWFdPmwLS/TuaXYMeItS7tXIg="; hash = "sha256-TVc+qFwRiS5tAo1OKI1Wu5hadlwPZmSVZ9SvVvH1w7Y=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -41,8 +41,6 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DHIP_ROOT_DIR=${clr}" "-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
@@ -67,8 +65,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -77,8 +75,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -9,6 +9,8 @@
gfortran, gfortran,
rocblas, rocblas,
rocsolver, rocsolver,
rocsparse,
suitesparse,
gtest, gtest,
lapack-reference, lapack-reference,
buildTests ? false, buildTests ? false,
@@ -19,7 +21,7 @@
# Can also use cuSOLVER # Can also use cuSOLVER
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipsolver"; pname = "hipsolver";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -39,7 +41,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "hipSOLVER"; repo = "hipSOLVER";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-iMfaOv4TdTkmaRHCZOuqUfjO081J6on71+s8nIwwV00="; hash = "sha256-ZQUKU3L4DgZ5zM7pCYEix0ulRkl78x/5wJnyCndTAwk=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -53,6 +55,8 @@ stdenv.mkDerivation (finalAttrs: {
[ [
rocblas rocblas
rocsolver rocsolver
rocsparse
suitesparse
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
gtest gtest
@@ -63,13 +67,13 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON" "-DBUILD_CLIENTS_TESTS=ON"
@@ -101,8 +105,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -111,8 +115,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -12,6 +12,7 @@
gtest, gtest,
openmp, openmp,
buildTests ? false, buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false, buildSamples ? false,
gpuTargets ? [ ], gpuTargets ? [ ],
}: }:
@@ -19,7 +20,7 @@
# This can also use cuSPARSE as a backend instead of rocSPARSE # This can also use cuSPARSE as a backend instead of rocSPARSE
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hipsparse"; pname = "hipsparse";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -36,7 +37,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "hipSPARSE"; repo = "hipSPARSE";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-fi5b0IF++OiezpM3JuUkhwpmW2apeFH4r5g6CcFseNY="; hash = "sha256-3a7fKpYyiqG3aGOg7YrTHmKoH4rgTVLD16DvrZ3YY1g=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -51,7 +52,7 @@ stdenv.mkDerivation (finalAttrs: {
rocsparse rocsparse
git git
] ]
++ lib.optionals buildTests [ ++ lib.optionals (buildTests || buildBenchmarks) [
gtest gtest
] ]
++ lib.optionals (buildTests || buildSamples) [ ++ lib.optionals (buildTests || buildSamples) [
@@ -60,20 +61,17 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples)
] ]
++ lib.optionals (gpuTargets != [ ]) [ ++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]; ];
# We have to manually generate the matrices # We have to manually generate the matrices
@@ -140,8 +138,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -150,8 +148,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -4,22 +4,24 @@
fetchurl, fetchurl,
callPackage, callPackage,
dpkg, dpkg,
rocm-core,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "hsa-amd-aqlprofile-bin"; pname = "hsa-amd-aqlprofile-bin";
version = "6.0.2"; version = "6.3.0";
src = src =
let let
version = finalAttrs.version; inherit (finalAttrs) version;
dotless = builtins.replaceStrings [ "." ] [ "0" ] version; patch = rocm-core.ROCM_LIBPATCH_VERSION;
incremental = "115"; majorMinor = lib.versions.major version + "." + lib.versions.minor version;
incremental = "39";
osRelease = "22.04"; osRelease = "22.04";
in in
fetchurl { fetchurl {
url = "https://repo.radeon.com/rocm/apt/${version}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${dotless}.${dotless}-${incremental}~${osRelease}_amd64.deb"; url = "https://repo.radeon.com/rocm/apt/${majorMinor}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb";
hash = "sha256-0XeKUKaof5pSMS/UgLwumBDBYgyH/pCex9jViUKENXY="; hash = "sha256-ghgz5ZgWopgLJcK4Vbwm6zlny3IwxzWz9V0Fuwu35R0=";
}; };
nativeBuildInputs = [ dpkg ]; nativeBuildInputs = [ dpkg ];
@@ -31,7 +33,7 @@ stdenv.mkDerivation (finalAttrs: {
runHook preInstall runHook preInstall
mkdir -p $out mkdir -p $out
cp -a opt/rocm-${finalAttrs.version}/* $out cp -a opt/rocm-${finalAttrs.version}*/* $out
chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.* chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.*
chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so
@@ -46,8 +48,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ unfree ]; license = with licenses; [ unfree ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,216 +0,0 @@
{
lib,
stdenv,
gcc12Stdenv,
fetchFromGitHub,
rocmUpdateScript,
pkg-config,
cmake,
ninja,
git,
doxygen,
sphinx,
lit,
libxml2,
libxcrypt,
libedit,
libffi,
mpfr,
zlib,
ncurses,
python3Packages,
buildDocs ? true,
buildMan ? true,
buildTests ? true,
targetName ? "llvm",
targetDir ? "llvm",
targetProjects ? [ ],
targetRuntimes ? [ ],
llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv
extraPatches ? [ ],
extraNativeBuildInputs ? [ ],
extraBuildInputs ? [ ],
extraCMakeFlags ? [ ],
extraPostPatch ? "",
checkTargets ? [
(lib.optionalString buildTests (if targetDir == "runtimes" then "check-runtimes" else "check-all"))
],
extraPostInstall ? "",
hardeningDisable ? [ ],
requiredSystemFeatures ? [ ],
extraLicenses ? [ ],
isBroken ? false,
}:
let
stdenv' = stdenv;
in
let
stdenv =
if stdenv'.cc.cc.isGNU or false && lib.versionAtLeast stdenv'.cc.cc.version "13.0" then
gcc12Stdenv
else
stdenv';
in
let
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t;
llvmTargetsToBuild' = [ "AMDGPU" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild;
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-llvm-${targetName}";
version = "6.0.2";
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildMan [
"man"
"info" # Avoid `attribute 'info' missing` when using with wrapCC
];
patches = extraPatches;
src = fetchFromGitHub {
owner = "ROCm";
repo = "llvm-project";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-uGxalrwMNCOSqSFVrYUBi3ijkMEFFTrzFImmvZKQf6I=";
};
nativeBuildInputs =
[
pkg-config
cmake
ninja
git
(python3Packages.python.withPackages (p: [ p.setuptools ]))
]
++ lib.optionals (buildDocs || buildMan) [
doxygen
sphinx
python3Packages.recommonmark
]
++ lib.optionals (buildTests && !finalAttrs.passthru.isLLVM) [
lit
]
++ extraNativeBuildInputs;
buildInputs = [
libxml2
libxcrypt
libedit
libffi
mpfr
] ++ extraBuildInputs;
propagatedBuildInputs = lib.optionals finalAttrs.passthru.isLLVM [
zlib
ncurses
];
sourceRoot = "${finalAttrs.src.name}/${targetDir}";
cmakeFlags =
[
"-DLLVM_TARGETS_TO_BUILD=${builtins.concatStringsSep ";" llvmTargetsToBuild'}"
]
++ lib.optionals (finalAttrs.passthru.isLLVM && targetProjects != [ ]) [
"-DLLVM_ENABLE_PROJECTS=${lib.concatStringsSep ";" targetProjects}"
]
++
lib.optionals ((finalAttrs.passthru.isLLVM || targetDir == "runtimes") && targetRuntimes != [ ])
[
"-DLLVM_ENABLE_RUNTIMES=${lib.concatStringsSep ";" targetRuntimes}"
]
++ lib.optionals finalAttrs.passthru.isLLVM [
"-DLLVM_INSTALL_UTILS=ON"
"-DLLVM_INSTALL_GTEST=ON"
]
++ lib.optionals (buildDocs || buildMan) [
"-DLLVM_INCLUDE_DOCS=ON"
"-DLLVM_BUILD_DOCS=ON"
# "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core
"-DLLVM_ENABLE_SPHINX=ON"
"-DSPHINX_OUTPUT_HTML=ON"
"-DSPHINX_OUTPUT_MAN=ON"
"-DSPHINX_WARNINGS_AS_ERRORS=OFF"
]
++ lib.optionals buildTests [
"-DLLVM_INCLUDE_TESTS=ON"
"-DLLVM_BUILD_TESTS=ON"
"-DLLVM_EXTERNAL_LIT=${lit}/bin/.lit-wrapped"
]
++ extraCMakeFlags;
prePatch = ''
cd ../
chmod -R u+w .
'';
postPatch =
''
cd ${targetDir}
''
+ lib.optionalString finalAttrs.passthru.isLLVM ''
patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh
''
+ lib.optionalString (buildTests && finalAttrs.passthru.isLLVM) ''
# FileSystem permissions tests fail with various special bits
rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test
rm unittests/Support/Path.cpp
substituteInPlace unittests/Support/CMakeLists.txt \
--replace-fail "Path.cpp" ""
''
+ extraPostPatch;
doCheck = buildTests;
checkTarget = lib.concatStringsSep " " checkTargets;
postInstall =
lib.optionalString buildMan ''
mkdir -p $info
''
+ extraPostInstall;
passthru = {
isLLVM = targetDir == "llvm";
isClang = targetDir == "clang" || builtins.elem "clang" targetProjects;
isROCm = true;
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
};
inherit hardeningDisable requiredSystemFeatures;
meta = with lib; {
description = "ROCm fork of the LLVM compiler infrastructure";
homepage = "https://github.com/ROCm/llvm-project";
license = with licenses; [ ncsa ] ++ extraLicenses;
maintainers =
with maintainers;
[
acowley
lovesegfault
]
++ teams.rocm.members;
platforms = platforms.linux;
broken = isBroken || versionAtLeast finalAttrs.version "7.0.0";
};
})

View File

@@ -0,0 +1,14 @@
diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp
index 34640b3c450d..93c4a4f4ec5c 100644
--- a/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/lib/Driver/ToolChains/CommonArgs.cpp
@@ -589,8 +589,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
#endif
SmallString<1024> Plugin;
- llvm::sys::path::native(Twine(D.Dir) +
- "/../" CLANG_INSTALL_LIBDIR_BASENAME +
+ llvm::sys::path::native(Twine("@libllvmLibdir@") +
PluginName + Suffix,
Plugin);
CmdArgs.push_back(Args.MakeArgString(Twine(PluginPrefix) + Plugin));

View File

@@ -0,0 +1,23 @@
diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
index 57368104c914..71c57f72078e 100644
--- a/lib/Driver/ToolChains/Linux.cpp
+++ b/lib/Driver/ToolChains/Linux.cpp
@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
return;
// LOCAL_INCLUDE_DIR
+ if (!SysRoot.empty())
addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include"));
// TOOL_INCLUDE_DIR
AddMultilibIncludeArgs(DriverArgs, CC1Args);
@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
// Add an include of '/include' directly. This isn't provided by default by
// system GCCs, but is often used with cross-compiling GCCs, and harmless to
// add even when Clang is acting as-if it were a system compiler.
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include"));
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include"));
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl())

View File

@@ -0,0 +1,40 @@
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 06f5e7e7e335..8407d664886a 100644
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -340,6 +340,9 @@ private:
void Compilation::ExecuteJobs(const JobList &Jobs,
FailingCommandList &FailingCommands,
bool LogOnly) const {
+ // If >1 job, log as each job finishes so can see progress while building many offloads
+ const bool logJobs = Jobs.size() > 1;
+ auto start_time = std::chrono::steady_clock::now();
// According to UNIX standard, driver need to continue compiling all the
// inputs on the command line even one of them failed.
// In all but CLMode, execute all the jobs unless the necessary inputs for the
@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs,
JS.setJobState(Next, JobScheduler::JS_RUN);
auto Work = [&, Next]() {
+ auto job_start_time = std::chrono::steady_clock::now();
const Command *FailingCommand = nullptr;
if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) {
FailingCommands.push_back(std::make_pair(Res, FailingCommand));
JS.setJobState(Next, JobScheduler::JS_FAIL);
} else {
+ if (logJobs && Next) {
+ auto now = std::chrono::steady_clock::now();
+ auto job_duration = std::chrono::duration_cast<std::chrono::seconds>(now - job_start_time).count();
+ auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start_time).count();
+ if (duration > 10 && job_duration > 0) {
+ if (Next->getOutputFilenames().empty())
+ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n";
+ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true);
+ else
+ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n";
+ }
+ }
+
JS.setJobState(Next, JobScheduler::JS_DONE);
}
};

View File

@@ -1,142 +1,515 @@
{ {
# stdenv FIXME: Try changing back to this with a new ROCm release https://github.com/NixOS/nixpkgs/issues/271943 lib,
gcc12Stdenv, stdenv,
callPackage, llvmPackages_18,
rocmUpdateScript,
wrapBintoolsWith,
overrideCC, overrideCC,
rocm-device-libs, rocm-device-libs,
rocm-runtime, rocm-runtime,
rocm-thunk, fetchFromGitHub,
clr, runCommand,
symlinkJoin,
rdfind,
wrapBintoolsWith,
emptyDirectory,
zstd,
zlib,
gcc-unwrapped,
glibc,
substituteAll,
libffi,
libxml2,
removeReferencesTo,
fetchpatch,
# Build compilers and stdenv suitable for profiling
# compressed line tables (-g1 -gz) and
# frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer)
# TODO: Should also apply to downstream packages which use rocmClangStdenv
profilableStdenv ? false,
}: }:
let let
## Stage 1 ## llvmPackagesNoBintools = llvmPackages_18.override {
# Projects bootBintools = null;
llvm = callPackage ./stage-1/llvm.nix { bootBintoolsNoLibc = null;
inherit rocmUpdateScript;
stdenv = gcc12Stdenv;
};
clang-unwrapped = callPackage ./stage-1/clang-unwrapped.nix {
inherit rocmUpdateScript llvm;
stdenv = gcc12Stdenv;
};
lld = callPackage ./stage-1/lld.nix {
inherit rocmUpdateScript llvm;
stdenv = gcc12Stdenv;
}; };
useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++
# Runtimes llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM;
runtimes = callPackage ./stage-1/runtimes.nix { llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv (
inherit rocmUpdateScript llvm; llvmPackagesNoBintools.libstdcxxClang.override {
stdenv = gcc12Stdenv; inherit (llvmPackages_18) bintools;
}; }
);
stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv;
gcc-include = runCommand "gcc-include" { } ''
mkdir -p $out
ln -s ${gcc-unwrapped}/include/ $out/
ln -s ${gcc-unwrapped}/lib/ $out/
'';
## Stage 2 ## # A prefix for use as the GCC prefix when building rocmcxx
# Helpers disallowedRefsForToolchain = [
bintools-unwrapped = callPackage ./stage-2/bintools-unwrapped.nix { inherit llvm lld; }; stdenv.cc
bintools = wrapBintoolsWith { bintools = bintools-unwrapped; }; stdenv.cc.cc
rStdenv = callPackage ./stage-2/rstdenv.nix { stdenv.cc.bintools
inherit gcc-unwrapped
llvm stdenvToBuildRocmLlvm
clang-unwrapped ];
lld gcc-prefix =
runtimes let
bintools gccPrefixPaths = [
; gcc-unwrapped
stdenv = gcc12Stdenv; gcc-unwrapped.lib
glibc.dev
];
in
symlinkJoin {
name = "gcc-prefix";
paths = gccPrefixPaths ++ [
glibc
];
disallowedRequisites = gccPrefixPaths;
postBuild = ''
rm -rf $out/{bin,libexec,nix-support,lib64,share,etc}
rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h
mkdir /build/tmpout
mv $out/* /build/tmpout
cp -Lr --no-preserve=mode /build/tmpout/* $out/
set -x
versionedIncludePath="$(echo $out/include/c++/*/)"
mv $versionedIncludePath/* $out/include/c++/
rm -rf $versionedIncludePath/
find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} +
ln -s $out $out/x86_64-unknown-linux-gnu
'';
};
version = "6.3.1";
# major version of this should be the clang version ROCm forked from
rocmLlvmVersion = "18.0.0-${llvmSrc.rev}";
usefulOutputs =
drv:
builtins.filter (x: x != null) [
drv
(drv.lib or null)
(drv.dev or null)
];
listUsefulOutputs = builtins.concatMap usefulOutputs;
llvmSrc = fetchFromGitHub {
# Performance improvements cherry-picked on top of rocm-6.3.x
# most importantly, amdgpu-early-alwaysinline memory usage fix
owner = "LunNova";
repo = "llvm-project-rocm";
rev = "4182046534deb851753f0d962146e5176f648893";
hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c=";
}; };
llvmSrcFixed = llvmSrc;
llvmMajorVersion = lib.versions.major rocmLlvmVersion;
# An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree
# optionally using LLVM libcxx
llvmPackagesRocm = llvmPackages_18.override (_old: {
stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv;
# not setting gitRelease = because that causes patch selection logic to use git patches
# ROCm LLVM is closer to 18 official
# gitRelease = {}; officialRelease = null;
officialRelease = { }; # Set but empty because we're overriding everything from it.
version = rocmLlvmVersion;
src = llvmSrcFixed;
monorepoSrc = llvmSrcFixed;
doCheck = false;
});
sysrootCompiler =
cc: name: paths:
let
linked = symlinkJoin { inherit name paths; };
in
runCommand name { } ''
set -x
mkdir -p $out/
cp --reflink=auto -rL ${linked}/* $out/
chmod -R +rw $out
mkdir -p $out/usr
ln -s $out/ $out/usr/local
mkdir -p $out/nix-support/
rm -rf $out/lib64 # we don't need mixed 32 bit
echo 'export CC=clang' >> $out/nix-support/setup-hook
echo 'export CXX=clang++' >> $out/nix-support/setup-hook
mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/
ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/
find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} +
find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} +
# our /include now has more than clang expects, so this specific dir still needs to point to cc.dev
# FIXME: could copy into a different subdir?
sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake
${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space
'';
findClangNostdlibincPatch =
x:
(
(lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x))
|| (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x))
);
llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${
{
"x86_64" = "X86";
"aarch64" = "AArch64";
}
.${llvmStdenv.targetPlatform.parsed.cpu.name}
}";
# -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too
# FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code,
# instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to
# use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time.
tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x);
addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang;
llvmExtraCflags =
"-O3 -DNDEBUG -march=skylake -mtune=znver3"
+ (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto")
+ (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects")
+ (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1");
in in
rec { rec {
inherit inherit (llvmPackagesRocm) libunwind;
llvm inherit (llvmPackagesRocm) libcxx;
clang-unwrapped llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig
lld clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig
bintools llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: {
; dontStrip = profilableStdenv;
nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ];
# Runtimes buildInputs = old.buildInputs ++ [
libc = callPackage ./stage-2/libc.nix { zstd
inherit rocmUpdateScript; zlib
stdenv = rStdenv; ];
env.NIX_BUILD_ID_STYLE = "fast";
postPatch = ''
${old.postPatch or ""}
patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh
'';
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
"-DLLVM_ENABLE_LIBCXX=ON"
];
preConfigure = ''
${old.preConfigure or ""}
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a"
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
});
lld =
(llvmPackagesRocm.lld.override {
libllvm = llvm;
ninja = emptyDirectory;
}).overrideAttrs
(old: {
patches = builtins.filter (
x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x))
) old.patches;
dontStrip = profilableStdenv;
nativeBuildInputs = old.nativeBuildInputs ++ [
llvmPackagesNoBintools.lld
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env.NIX_BUILD_ID_STYLE = "fast";
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LIBCXX=ON"
];
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
preConfigure = ''
${old.preConfigure or ""}
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
});
clang-unwrapped =
(
(llvmPackagesRocm.clang-unwrapped.override {
libllvm = llvm;
ninja = emptyDirectory;
}).overrideAttrs
(
old:
let
filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches;
in
{
meta.platforms = [
"x86_64-linux"
];
pname = "${old.pname}-rocm";
patches = filteredPatches ++ [
./clang-bodge-ignore-systemwide-incls.diff
./clang-log-jobs.diff
(fetchpatch {
# [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler
sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch";
relative = "clang";
})
# FIXME: if llvm was overrideable properly this wouldn't be needed
(substituteAll {
src = ./clang-at-least-16-LLVMgold-path.patch;
libllvmLibdir = "${llvm.lib}/lib";
})
];
nativeBuildInputs = old.nativeBuildInputs ++ [
llvmPackagesNoBintools.lld
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
dontStrip = profilableStdenv;
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
env = (old.env or { }) // {
NIX_BUILD_ID_STYLE = "fast";
};
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ];
# https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_ENABLE_LIBCXX=ON"
"-DLLVM_USE_LINKER=lld"
"-DCLANG_DEFAULT_RTLIB=compiler-rt"
]
++ lib.optionals (!useLibcxx) [
# FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX?
"-DGCC_INSTALL_PREFIX=${gcc-prefix}"
];
postFixup =
(old.postFixup or "")
+ ''
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
preConfigure =
(old.preConfigure or "")
+ ''
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
}
)
)
// {
libllvm = llvm;
};
# A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path
# in the right order
# and expects its libc to be in the sysroot
rocmcxx =
(sysrootCompiler clang-unwrapped "rocmcxx" (
listUsefulOutputs (
[
clang-unwrapped
bintools
compiler-rt
]
++ (lib.optionals useLibcxx [
libcxx
])
++ (lib.optionals (!useLibcxx) [
gcc-include
glibc
glibc.dev
])
)
))
// {
version = llvmMajorVersion;
cc = rocmcxx;
libllvm = llvm;
isClang = true;
isGNU = false;
};
clang-tools = llvmPackagesRocm.clang-tools.override {
inherit clang-unwrapped clang;
}; };
libunwind = callPackage ./stage-2/libunwind.nix { compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: {
inherit rocmUpdateScript; patches = old.patches ++ [
stdenv = rStdenv; (fetchpatch {
}; name = "Fix-missing-main-function-in-float16-bfloat16-support-checks.patch";
libcxxabi = callPackage ./stage-2/libcxxabi.nix { url = "https://github.com/ROCm/llvm-project/commit/68d8b3846ab1e6550910f2a9a685690eee558af2.patch";
inherit rocmUpdateScript; hash = "sha256-Db+L1HFMWVj4CrofsGbn5lnMoCzEcU+7q12KKFb17/g=";
stdenv = rStdenv; relative = "compiler-rt";
}; })
libcxx = callPackage ./stage-2/libcxx.nix { ];
inherit rocmUpdateScript; });
stdenv = rStdenv; compiler-rt = compiler-rt-libc;
}; bintools = wrapBintoolsWith {
compiler-rt = callPackage ./stage-2/compiler-rt.nix { bintools = llvmPackagesRocm.bintools-unwrapped.override {
inherit rocmUpdateScript llvm; inherit lld llvm;
stdenv = rStdenv; };
}; };
## Stage 3 ## clang = rocmcxx;
# Helpers
clang = callPackage ./stage-3/clang.nix { # Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects
inherit rocm-merged-llvm = symlinkJoin {
llvm name = "rocm-llvm-merge";
lld paths =
clang-unwrapped [
bintools llvm
libc llvm.dev
libunwind lld
libcxxabi lld.lib
libcxx lld.dev
compiler-rt libunwind
; libunwind.dev
stdenv = gcc12Stdenv; compiler-rt
compiler-rt.dev
rocmcxx
]
++ lib.optionals useLibcxx [
libcxx
libcxx.out
libcxx.dev
];
postBuild = builtins.unsafeDiscardStringContext ''
found_files=$(find $out -name '*.cmake')
if [ -z "$found_files" ]; then
>&2 echo "Error: No CMake files found in $out"
exit 1
fi
for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do
if grep "$target" $found_files; then
>&2 echo "Unexpected ref to $target (clang-unwrapped) found"
# exit 1
# # FIXME: enable this to reduce closure size
fi
done
'';
inherit version;
llvm-src = llvmSrc;
}; };
rocmClangStdenv = overrideCC gcc12Stdenv clang;
rocmClangStdenv = overrideCC (
if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv
) clang;
# Projects # Projects
clang-tools-extra = callPackage ./stage-3/clang-tools-extra.nix { openmp =
inherit rocmUpdateScript llvm clang-unwrapped; (llvmPackagesRocm.openmp.override {
stdenv = rocmClangStdenv; stdenv = rocmClangStdenv;
}; llvm = rocm-merged-llvm;
libclc = callPackage ./stage-3/libclc.nix { targetLlvm = rocm-merged-llvm;
inherit rocmUpdateScript llvm clang; clang-unwrapped = clang;
stdenv = rocmClangStdenv; }).overrideAttrs
}; (old: {
lldb = callPackage ./stage-3/lldb.nix { disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
inherit rocmUpdateScript clang; nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ];
stdenv = rocmClangStdenv; cmakeFlags =
}; old.cmakeFlags
mlir = callPackage ./stage-3/mlir.nix { ++ [
inherit rocmUpdateScript clr; "-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
stdenv = rocmClangStdenv; # OMPD support is broken in ROCm 6.3. Haven't investigated why.
}; "-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE"
polly = callPackage ./stage-3/polly.nix { "-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE"
inherit rocmUpdateScript; ]
stdenv = rocmClangStdenv; ++ lib.optionals addGccLtoCmakeFlags [
}; "-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
flang = callPackage ./stage-3/flang.nix { "-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
inherit rocmUpdateScript clang-unwrapped mlir; ];
stdenv = rocmClangStdenv; env.LLVM = "${rocm-merged-llvm}";
}; env.LLVM_DIR = "${rocm-merged-llvm}";
openmp = callPackage ./stage-3/openmp.nix { buildInputs = old.buildInputs ++ [
inherit rocm-device-libs
rocmUpdateScript rocm-runtime
llvm zlib
clang-unwrapped zstd
clang libxml2
rocm-device-libs libffi
rocm-runtime ];
rocm-thunk });
;
stdenv = rocmClangStdenv;
};
# Runtimes
pstl = callPackage ./stage-3/pstl.nix {
inherit rocmUpdateScript;
stdenv = rocmClangStdenv;
};
} }

View File

@@ -1,48 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
}:
callPackage ../base.nix {
inherit stdenv rocmUpdateScript;
targetName = "clang-unwrapped";
targetDir = "clang";
extraBuildInputs = [ llvm ];
extraCMakeFlags = [
"-DCLANG_INCLUDE_DOCS=ON"
"-DCLANG_INCLUDE_TESTS=ON"
];
extraPostPatch = ''
# Looks like they forgot to add finding libedit to the standalone build
ln -s ../cmake/Modules/FindLibEdit.cmake cmake/modules
substituteInPlace CMakeLists.txt \
--replace-fail "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)"
# `No such file or directory: '/build/source/clang/tools/scan-build/bin/scan-build'`
rm test/Analysis/scan-build/*.test
rm test/Analysis/scan-build/rebuild_index/rebuild_index.test
# `does not depend on a module exporting 'baz.h'`
rm test/Modules/header-attribs.cpp
# We do not have HIP or the ROCm stack available yet
rm test/Driver/hip-options.hip
# ???? `ld: cannot find crti.o: No such file or directory` linker issue?
rm test/Interpreter/dynamic-library.cpp
# `fatal error: 'stdio.h' file not found`
rm test/OpenMP/amdgcn_emit_llvm.c
'';
extraPostInstall = ''
mv bin/clang-tblgen $out/bin
'';
requiredSystemFeatures = [ "big-parallel" ];
}

View File

@@ -1,15 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "lld";
targetDir = targetName;
extraBuildInputs = [ llvm ];
checkTargets = [ "check-${targetName}" ];
}

View File

@@ -1,11 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix {
inherit stdenv rocmUpdateScript;
requiredSystemFeatures = [ "big-parallel" ];
isBroken = stdenv.hostPlatform.isAarch64; # https://github.com/ROCm/ROCm/issues/1831#issuecomment-1278205344
}

View File

@@ -1,32 +0,0 @@
{
lib,
stdenv,
callPackage,
rocmUpdateScript,
llvm,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false;
buildMan = false;
buildTests = false;
targetName = "runtimes";
targetDir = targetName;
targetRuntimes = [
"libunwind"
"libcxxabi"
"libcxx"
"compiler-rt"
];
extraBuildInputs = [ llvm ];
extraCMakeFlags = [
"-DLIBCXX_INCLUDE_BENCHMARKS=OFF"
"-DLIBCXX_CXX_ABI=libcxxabi"
];
extraLicenses = [ lib.licenses.mit ];
}

View File

@@ -1,176 +0,0 @@
../libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp
../libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp
../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp
../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/construct.cxx2a.pass.cpp
../libcxx/test/libcxx/input.output/filesystems/class.directory_entry/directory_entry.mods/last_write_time.pass.cpp
../libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp
../libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
../libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/no_allocation.pass.cpp
../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_underaligned_buffer.pass.cpp
../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp
../libcxx/test/std/containers/associative/map/map.access/index_key.pass.cpp
../libcxx/test/std/containers/associative/map/map.access/index_rv_key.pass.cpp
../libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp
../libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp
../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp
../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp
../libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp
../libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
../libcxx/test/std/containers/unord/unord.map/unord.map.elem/index.pass.cpp
../libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp
../libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp
../libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp
../libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/path.decompose.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_normal.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_relative_and_proximate.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/clear.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/make_preferred.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/remove_filename.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_extension.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_filename.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/swap.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.factory.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/swap.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_large.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_symlink/copy_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory_symlink/create_directory_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_symlink/create_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.proximate/proximate.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/remove_all.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove/remove.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.temp_dir_path/temp_directory_path.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
../libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp
../libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp
../libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp
../libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp
../libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp
../libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/default.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
../libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
../libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.alg/swap.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_assign.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_assign.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp
../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp
../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_throw.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.ctor/without_buffer.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_deallocate.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_initial_buffer.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_zero_sized_buffer.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.ctor/ctor_does_not_allocate.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate.pass.cpp
../libcxx/test/std/language.support/support.dynamic/hardware_inference_size.compile.pass.cpp
../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array14.pass.cpp
../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete14.pass.cpp
../libcxx/test/libcxx/selftest/sh.cpp/empty.sh.cpp
../libcxx/test/libcxx/transitive_includes.sh.cpp

View File

@@ -1,29 +0,0 @@
{
runCommand,
llvm,
lld,
}:
runCommand "rocm-llvm-binutils-${llvm.version}" { preferLocalBuild = true; } ''
mkdir -p $out/bin
for prog in ${lld}/bin/*; do
ln -s $prog $out/bin/$(basename $prog)
done
for prog in ${llvm}/bin/*; do
ln -sf $prog $out/bin/$(basename $prog)
done
ln -s ${llvm}/bin/llvm-ar $out/bin/ar
ln -s ${llvm}/bin/llvm-as $out/bin/as
ln -s ${llvm}/bin/llvm-dwp $out/bin/dwp
ln -s ${llvm}/bin/llvm-nm $out/bin/nm
ln -s ${llvm}/bin/llvm-objcopy $out/bin/objcopy
ln -s ${llvm}/bin/llvm-objdump $out/bin/objdump
ln -s ${llvm}/bin/llvm-ranlib $out/bin/ranlib
ln -s ${llvm}/bin/llvm-readelf $out/bin/readelf
ln -s ${llvm}/bin/llvm-size $out/bin/size
ln -s ${llvm}/bin/llvm-strip $out/bin/strip
ln -s ${lld}/bin/lld $out/bin/ld
''

View File

@@ -1,64 +0,0 @@
{
lib,
stdenv,
callPackage,
rocmUpdateScript,
llvm,
glibc,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
targetName = "compiler-rt";
targetDir = "runtimes";
targetRuntimes = [
"libunwind"
"libcxxabi"
"libcxx"
targetName
];
extraCMakeFlags = [
"-DCOMPILER_RT_INCLUDE_TESTS=ON"
"-DCOMPILER_RT_USE_LLVM_UNWINDER=ON"
"-DCOMPILER_RT_CXX_LIBRARY=libcxx"
"-DCOMPILER_RT_CAN_EXECUTE_TESTS=OFF" # We can't run most of these
# Workaround having to build combined
"-DLIBUNWIND_INCLUDE_DOCS=OFF"
"-DLIBUNWIND_INCLUDE_TESTS=OFF"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
"-DLIBUNWIND_INSTALL_LIBRARY=OFF"
"-DLIBUNWIND_INSTALL_HEADERS=OFF"
"-DLIBCXXABI_INCLUDE_TESTS=OFF"
"-DLIBCXXABI_USE_LLVM_UNWINDER=ON"
"-DLIBCXXABI_USE_COMPILER_RT=ON"
"-DLIBCXXABI_INSTALL_LIBRARY=OFF"
"-DLIBCXXABI_INSTALL_HEADERS=OFF"
"-DLIBCXX_INCLUDE_DOCS=OFF"
"-DLIBCXX_INCLUDE_TESTS=OFF"
"-DLIBCXX_USE_COMPILER_RT=ON"
"-DLIBCXX_CXX_ABI=libcxxabi"
"-DLIBCXX_INSTALL_LIBRARY=OFF"
"-DLIBCXX_INSTALL_HEADERS=OFF"
];
extraPostPatch = ''
# `No such file or directory: 'ldd'`
substituteInPlace ../compiler-rt/test/lit.common.cfg.py \
--replace "'ldd'," "'${glibc.bin}/bin/ldd',"
# We can run these
substituteInPlace ../compiler-rt/test/CMakeLists.txt \
--replace "endfunction()" "endfunction()''\nadd_subdirectory(builtins)''\nadd_subdirectory(shadowcallstack)"
# Could not launch llvm-config in /build/source/runtimes/build/bin
mkdir -p build/bin
ln -s ${llvm}/bin/llvm-config build/bin
'';
extraLicenses = [ lib.licenses.mit ];
}

View File

@@ -1,27 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "libc";
targetDir = "runtimes";
targetRuntimes = [ targetName ];
extraPostPatch = ''
# `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:`
# We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working...
substituteInPlace ../libc/test/src/math/log10_test.cpp \
--replace-fail "i < N" "i < 0" \
--replace-fail "test(mpfr::RoundingMode::Nearest);" "" \
--replace-fail "test(mpfr::RoundingMode::Downward);" "" \
--replace-fail "test(mpfr::RoundingMode::Upward);" "" \
--replace-fail "test(mpfr::RoundingMode::TowardZero);" ""
'';
checkTargets = [ "check-${targetName}" ];
hardeningDisable = [ "fortify" ]; # Prevent `error: "Assumed value of MB_LEN_MAX wrong"`
}

View File

@@ -1,43 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "libcxx";
targetDir = "runtimes";
targetRuntimes = [
"libunwind"
"libcxxabi"
targetName
];
extraCMakeFlags = [
"-DLIBCXX_INCLUDE_DOCS=ON"
"-DLIBCXX_INCLUDE_TESTS=ON"
"-DLIBCXX_USE_COMPILER_RT=ON"
"-DLIBCXX_CXX_ABI=libcxxabi"
# Workaround having to build combined
"-DLIBUNWIND_INCLUDE_DOCS=OFF"
"-DLIBUNWIND_INCLUDE_TESTS=OFF"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
"-DLIBUNWIND_INSTALL_LIBRARY=OFF"
"-DLIBUNWIND_INSTALL_HEADERS=OFF"
"-DLIBCXXABI_INCLUDE_TESTS=OFF"
"-DLIBCXXABI_USE_LLVM_UNWINDER=ON"
"-DLIBCXXABI_USE_COMPILER_RT=ON"
"-DLIBCXXABI_INSTALL_LIBRARY=OFF"
"-DLIBCXXABI_INSTALL_HEADERS=OFF"
];
# Most of these can't find `bash` or `mkdir`, might just be hard-coded paths, or PATH is altered
extraPostPatch = ''
chmod +w -R ../libcxx/test/{libcxx,std}
cat ${./1000-libcxx-failing-tests.list} | xargs -d \\n rm
'';
}

View File

@@ -1,38 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
targetName = "libcxxabi";
targetDir = "runtimes";
targetRuntimes = [
"libunwind"
targetName
"libcxx"
];
extraCMakeFlags = [
"-DLIBCXXABI_INCLUDE_TESTS=ON"
"-DLIBCXXABI_USE_LLVM_UNWINDER=ON"
"-DLIBCXXABI_USE_COMPILER_RT=ON"
# Workaround having to build combined
"-DLIBUNWIND_INCLUDE_DOCS=OFF"
"-DLIBUNWIND_INCLUDE_TESTS=OFF"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
"-DLIBUNWIND_INSTALL_LIBRARY=OFF"
"-DLIBUNWIND_INSTALL_HEADERS=OFF"
"-DLIBCXX_INCLUDE_DOCS=OFF"
"-DLIBCXX_INCLUDE_TESTS=OFF"
"-DLIBCXX_USE_COMPILER_RT=ON"
"-DLIBCXX_CXX_ABI=libcxxabi"
"-DLIBCXX_INSTALL_LIBRARY=OFF"
"-DLIBCXX_INSTALL_HEADERS=OFF"
];
}

View File

@@ -1,27 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "libunwind";
targetDir = "runtimes";
targetRuntimes = [ targetName ];
extraCMakeFlags = [
"-DLIBUNWIND_INCLUDE_DOCS=ON"
"-DLIBUNWIND_INCLUDE_TESTS=ON"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
];
extraPostPatch = ''
# `command had no output on stdout or stderr` (Says these unsupported tests)
chmod +w -R ../libunwind/test
rm ../libunwind/test/floatregister.pass.cpp
rm ../libunwind/test/unwind_leaffunction.pass.cpp
rm ../libunwind/test/libunwind_02.pass.cpp
'';
}

View File

@@ -1,37 +0,0 @@
{
stdenv,
overrideCC,
wrapCCWith,
llvm,
clang-unwrapped,
lld,
runtimes,
bintools,
}:
overrideCC stdenv (wrapCCWith rec {
inherit bintools;
libcxx = runtimes;
cc = clang-unwrapped;
gccForLibs = stdenv.cc.cc;
extraPackages = [
llvm
lld
];
nixSupport.cc-cflags = [
"-resource-dir=$out/resource-root"
"-fuse-ld=lld"
"-rtlib=compiler-rt"
"-unwindlib=libunwind"
"-Wno-unused-command-line-argument"
];
extraBuildCommands = ''
clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
mkdir -p $out/resource-root
ln -s ${cc}/lib/clang/$clang_version/include $out/resource-root
ln -s ${runtimes}/lib $out/resource-root
'';
})

View File

@@ -1,122 +0,0 @@
runtime/test/tasking/hidden_helper_task/gtid.cpp
runtime/test/ompt/parallel/parallel_if0.c
runtime/test/ompt/parallel/serialized.c
runtime/test/ompt/teams/parallel_team.c
runtime/test/ompt/teams/serial_teams.c
runtime/test/ompt/teams/serialized.c
runtime/test/ompt/teams/team.c
libomptarget/test/api/assert.c
libomptarget/test/api/omp_device_managed_memory.c
libomptarget/test/api/omp_device_memory.c
libomptarget/test/api/omp_get_device_num.c
libomptarget/test/api/omp_host_pinned_memory.c
libomptarget/test/api/omp_host_pinned_memory_alloc.c
libomptarget/test/api/omp_target_memcpy_async1.c
libomptarget/test/api/omp_target_memcpy_async2.c
libomptarget/test/api/omp_target_memcpy_rect_async1.c
libomptarget/test/api/omp_target_memcpy_rect_async2.c
libomptarget/test/mapping/array_section_implicit_capture.c
libomptarget/test/mapping/data_absent_at_exit.c
libomptarget/test/mapping/data_member_ref.cpp
libomptarget/test/mapping/declare_mapper_api.cpp
libomptarget/test/mapping/declare_mapper_target.cpp
libomptarget/test/mapping/declare_mapper_target_data.cpp
libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
libomptarget/test/mapping/firstprivate_aligned.cpp
libomptarget/test/mapping/has_device_addr.cpp
libomptarget/test/mapping/implicit_device_ptr.c
libomptarget/test/mapping/is_device_ptr.cpp
libomptarget/test/mapping/lambda_mapping.cpp
libomptarget/test/mapping/low_alignment.c
libomptarget/test/mapping/map_back_race.cpp
libomptarget/test/mapping/power_of_two_alignment.c
libomptarget/test/mapping/pr38704.c
libomptarget/test/mapping/prelock.cpp
libomptarget/test/mapping/present/target_data_at_exit.c
libomptarget/test/mapping/private_mapping.c
libomptarget/test/mapping/ptr_and_obj_motion.c
libomptarget/test/mapping/reduction_implicit_map.cpp
libomptarget/test/mapping/target_derefence_array_pointrs.cpp
libomptarget/test/mapping/target_map_for_member_data.cpp
libomptarget/test/mapping/target_update_array_extension.c
libomptarget/test/mapping/target_use_device_addr.c
libomptarget/test/offloading/atomic-compare-signedness.c
libomptarget/test/offloading/bug47654.cpp
libomptarget/test/offloading/bug49021.cpp
libomptarget/test/offloading/bug49779.cpp
libomptarget/test/offloading/bug50022.cpp
libomptarget/test/offloading/bug51781.c
libomptarget/test/offloading/bug51982.c
libomptarget/test/offloading/bug53727.cpp
libomptarget/test/offloading/complex_reduction.cpp
libomptarget/test/offloading/cuda_no_devices.c
libomptarget/test/offloading/d2d_memcpy.c
libomptarget/test/offloading/dynamic_module.c
libomptarget/test/offloading/dynamic_module_load.c
libomptarget/test/offloading/global_constructor.cpp
libomptarget/test/offloading/lone_target_exit_data.c
libomptarget/test/offloading/memory_manager.cpp
libomptarget/test/offloading/parallel_offloading_map.cpp
libomptarget/test/offloading/static_linking.c
libomptarget/test/offloading/std_complex_arithmetic.cpp
libomptarget/test/offloading/target-teams-atomic.c
libomptarget/test/offloading/target_constexpr_mapping.cpp
libomptarget/test/offloading/target_critical_region.cpp
libomptarget/test/offloading/target_depend_nowait.cpp
libomptarget/test/offloading/target_nowait_target.cpp
libomptarget/test/offloading/taskloop_offload_nowait.cpp
libomptarget/test/offloading/test_libc.cpp
libomptarget/test/ompt/veccopy.c
libomptarget/test/ompt/veccopy_disallow_both.c
libomptarget/test/ompt/veccopy_emi.c
libomptarget/test/ompt/veccopy_emi_map.c
libomptarget/test/ompt/veccopy_map.c
libomptarget/test/ompt/veccopy_no_device_init.c
libomptarget/test/ompt/veccopy_wrong_return.c
libomptarget/test/api/is_initial_device.c
libomptarget/test/mapping/declare_mapper_nested_default_mappers_array_subscript.cpp
libomptarget/test/mapping/declare_mapper_nested_default_mappers_ptr_subscript.cpp
libomptarget/test/mapping/declare_mapper_nested_default_mappers_var.cpp
libomptarget/test/mapping/target_pointers_members_map.cpp
libomptarget/test/api/omp_dynamic_shared_memory_mixed.c
libomptarget/test/api/omp_env_vars.c
libomptarget/test/api/omp_get_mapped_ptr.c
libomptarget/test/api/omp_get_num_devices.c
libomptarget/test/api/omp_get_num_devices_with_empty_target.c
libomptarget/test/mapping/alloc_fail.c
libomptarget/test/mapping/array_section_use_device_ptr.c
libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
libomptarget/test/mapping/declare_mapper_target_update.cpp
libomptarget/test/mapping/delete_inf_refcount.c
libomptarget/test/mapping/lambda_by_value.cpp
libomptarget/test/mapping/ompx_hold/omp_target_disassociate_ptr.c
libomptarget/test/mapping/ompx_hold/struct.c
libomptarget/test/mapping/ompx_hold/target-data.c
libomptarget/test/mapping/ompx_hold/target.c
libomptarget/test/mapping/present/target.c
libomptarget/test/mapping/present/target_array_extension.c
libomptarget/test/mapping/present/target_data.c
libomptarget/test/mapping/present/target_data_array_extension.c
libomptarget/test/mapping/present/target_enter_data.c
libomptarget/test/mapping/present/target_exit_data_delete.c
libomptarget/test/mapping/present/target_exit_data_release.c
libomptarget/test/mapping/present/target_update.c
libomptarget/test/mapping/present/target_update_array_extension.c
libomptarget/test/mapping/present/zero_length_array_section.c
libomptarget/test/mapping/present/zero_length_array_section_exit.c
libomptarget/test/mapping/target_data_array_extension_at_exit.c
libomptarget/test/mapping/target_has_device_addr.c
libomptarget/test/mapping/target_implicit_partial_map.c
libomptarget/test/mapping/target_wrong_use_device_addr.c
libomptarget/test/offloading/host_as_target.c
libomptarget/test/offloading/info.c
libomptarget/test/offloading/offloading_success.c
libomptarget/test/offloading/offloading_success.cpp
libomptarget/test/offloading/wtime.c
libomptarget/test/unified_shared_memory/api.c
libomptarget/test/unified_shared_memory/associate_ptr.c
libomptarget/test/unified_shared_memory/close_enter_exit.c
libomptarget/test/unified_shared_memory/close_manual.c
libomptarget/test/unified_shared_memory/close_member.c
libomptarget/test/unified_shared_memory/close_modifier.c

View File

@@ -1,11 +0,0 @@
./test/Target/LLVMIR/openmp-llvm.mlir
./test/mlir-spirv-cpu-runner/double.mlir
./test/mlir-spirv-cpu-runner/simple_add.mlir
./test/mlir-vulkan-runner/addf.mlir
./test/mlir-vulkan-runner/addi.mlir
./test/mlir-vulkan-runner/addi8.mlir
./test/mlir-vulkan-runner/mulf.mlir
./test/mlir-vulkan-runner/smul_extended.mlir
./test/mlir-vulkan-runner/subf.mlir
./test/mlir-vulkan-runner/time.mlir
./test/mlir-vulkan-runner/umul_extended.mlir

View File

@@ -1,43 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
clang-unwrapped,
gtest,
}:
callPackage ../base.nix {
inherit stdenv rocmUpdateScript;
buildTests = false; # `invalid operands to binary expression ('std::basic_stringstream<char>' and 'const llvm::StringRef')`
targetName = "clang-tools-extra";
targetProjects = [
"clang"
"clang-tools-extra"
];
extraBuildInputs = [ gtest ];
extraCMakeFlags = [
"-DLLVM_INCLUDE_DOCS=OFF"
"-DLLVM_INCLUDE_TESTS=OFF"
"-DCLANG_INCLUDE_DOCS=OFF"
"-DCLANG_INCLUDE_TESTS=ON"
"-DCLANG_TOOLS_EXTRA_INCLUDE_DOCS=ON"
];
extraPostInstall = ''
# Remove LLVM and Clang
for path in `find ${llvm} ${clang-unwrapped}`; do
if [ $path != ${llvm} ] && [ $path != ${clang-unwrapped} ]; then
rm -f $out''${path#${llvm}} $out''${path#${clang-unwrapped}} || true
fi
done
# Cleanup empty directories
find $out -type d -empty -delete
'';
requiredSystemFeatures = [ "big-parallel" ];
}

View File

@@ -1,77 +0,0 @@
{
stdenv,
wrapCCWith,
llvm,
lld,
clang-unwrapped,
bintools,
libc,
libunwind,
libcxxabi,
libcxx,
compiler-rt,
}:
wrapCCWith rec {
inherit libcxx bintools;
# We do this to avoid HIP pathing problems, and mimic a monolithic install
cc = stdenv.mkDerivation (finalAttrs: {
inherit (clang-unwrapped) version;
pname = "rocm-llvm-clang";
dontUnpack = true;
installPhase = ''
runHook preInstall
clang_version=`${clang-unwrapped}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
mkdir -p $out/{bin,include/c++/v1,lib/{cmake,clang/$clang_version/{include,lib}},libexec,share}
for path in ${llvm} ${clang-unwrapped} ${lld} ${libc} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do
cp -as $path/* $out
chmod +w $out/{*,include/c++/v1,lib/{clang/$clang_version/include,cmake}}
rm -f $out/lib/libc++.so
done
ln -s $out/lib/* $out/lib/clang/$clang_version/lib
ln -sf $out/include/* $out/lib/clang/$clang_version/include
runHook postInstall
'';
passthru.isClang = true;
passthru.isROCm = true;
});
gccForLibs = stdenv.cc.cc;
extraPackages = [
llvm
lld
libc
libunwind
libcxxabi
compiler-rt
];
nixSupport.cc-cflags = [
"-resource-dir=$out/resource-root"
"-fuse-ld=lld"
"-rtlib=compiler-rt"
"-unwindlib=libunwind"
"-Wno-unused-command-line-argument"
];
extraBuildCommands = ''
clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
mkdir -p $out/resource-root
ln -s ${cc}/lib/clang/$clang_version/{include,lib} $out/resource-root
# Not sure why, but hardening seems to make things break
echo "" > $out/nix-support/add-hardening.sh
# GPU compilation uses builtin `lld`
substituteInPlace $out/bin/{clang,clang++} \
--replace-fail "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;"
'';
}

View File

@@ -1,32 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
clang-unwrapped,
mlir,
graphviz,
python3Packages,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
targetName = "flang";
targetDir = targetName;
extraNativeBuildInputs = [
graphviz
python3Packages.sphinx-markdown-tables
];
extraBuildInputs = [ mlir ];
extraCMakeFlags = [
"-DCLANG_DIR=${clang-unwrapped}/lib/cmake/clang"
"-DMLIR_TABLEGEN_EXE=${mlir}/bin/mlir-tblgen"
"-DCLANG_TABLEGEN_EXE=${clang-unwrapped}/bin/clang-tblgen"
"-DFLANG_INCLUDE_TESTS=OFF" # `The dependency target "Bye" of target ...`
];
# `flang/lib/Semantics/check-omp-structure.cpp:1905:1: error: no member named 'v' in 'Fortran::parser::OmpClause::OmpxDynCgroupMem'`
isBroken = true;
}

View File

@@ -1,38 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
clang,
spirv-llvm-translator,
}:
let
spirv = (spirv-llvm-translator.override { inherit llvm; });
in
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
targetName = "libclc";
targetDir = targetName;
extraBuildInputs = [ spirv ];
# `spirv-mesa3d` isn't compiling with LLVM 15.0.0, it does with LLVM 14.0.0
# Try removing the `spirv-mesa3d` and `clspv` patches next update
# `clspv` tests fail, unresolved calls
extraPostPatch = ''
substituteInPlace CMakeLists.txt \
--replace-fail "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \
"find_program( LLVM_CLANG clang PATHS \"${clang}/bin\" NO_DEFAULT_PATH )" \
--replace-fail "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \
"find_program( LLVM_SPIRV llvm-spirv PATHS \"${spirv}/bin\" NO_DEFAULT_PATH )" \
--replace-fail " spirv-mesa3d-" "" \
--replace-fail " spirv64-mesa3d-" "" \
--replace-fail "NOT \''${t} MATCHES" \
"NOT \''${ARCH} STREQUAL \"clspv\" AND NOT \''${ARCH} STREQUAL \"clspv64\" AND NOT \''${t} MATCHES"
'';
checkTargets = [ ];
isBroken = true; # ROCm 5.7.0 doesn't have IR/AttributeMask.h yet...?
}

View File

@@ -1,40 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
clang,
xz,
swig,
lua5_3,
graphviz,
gtest,
python3Packages,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildTests = false; # FIXME: Bad pathing for clang executable in tests, using relative path most likely
targetName = "lldb";
targetDir = targetName;
extraNativeBuildInputs = [ python3Packages.sphinx-automodapi ];
extraBuildInputs = [
xz
swig
lua5_3
graphviz
gtest
];
extraCMakeFlags = [
"-DLLDB_EXTERNAL_CLANG_RESOURCE_DIR=${clang}/resource-root/lib/clang/$clang_version"
"-DLLDB_INCLUDE_TESTS=ON"
"-DLLDB_INCLUDE_UNITTESTS=ON"
];
extraPostPatch = ''
export clang_version=`clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
'';
checkTargets = [ "check-${targetName}" ];
}

View File

@@ -1,61 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
clr,
vulkan-headers,
vulkan-loader,
glslang,
shaderc,
fetchpatch,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No decent way to hack this to work
buildMan = false; # No man pages to build
targetName = "mlir";
targetDir = targetName;
# Fix `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'`
extraPatches = [
(fetchpatch {
url = "https://github.com/ROCm/llvm-project/commit/f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74.patch";
hash = "sha256-3c91A9InMKxm+JcnWxoUeOU68y5I6w1AAXx6T9UByqI=";
})
];
extraNativeBuildInputs = [ clr ];
extraBuildInputs = [
vulkan-headers
vulkan-loader
glslang
shaderc
];
extraCMakeFlags = [
"-DMLIR_INCLUDE_DOCS=ON"
"-DMLIR_INCLUDE_TESTS=ON"
"-DMLIR_ENABLE_ROCM_RUNNER=ON"
"-DMLIR_ENABLE_SPIRV_CPU_RUNNER=ON"
"-DMLIR_ENABLE_VULKAN_RUNNER=ON"
"-DROCM_TEST_CHIPSET=gfx000" # CPU runner
];
extraPostPatch = ''
# `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists`
substituteInPlace CMakeLists.txt \
--replace-fail "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE"
# Mainly `No such file or directory`
cat ${./1001-mlir-failing-tests.list} | xargs -d \\n rm
'';
extraPostInstall = ''
mkdir -p $out/bin
mv bin/mlir-tblgen $out/bin
'';
checkTargets = [ "check-${targetName}" ];
requiredSystemFeatures = [ "big-parallel" ];
}

View File

@@ -1,55 +0,0 @@
{
lib,
stdenv,
callPackage,
rocmUpdateScript,
llvm,
clang,
clang-unwrapped,
rocm-device-libs,
rocm-runtime,
rocm-thunk,
perl,
elfutils,
libdrm,
numactl,
lit,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
targetName = "openmp";
targetDir = targetName;
extraNativeBuildInputs = [ perl ];
extraBuildInputs = [
rocm-device-libs
rocm-runtime
rocm-thunk
elfutils
libdrm
numactl
];
extraCMakeFlags = [
"-DCMAKE_MODULE_PATH=/build/source/llvm/cmake/modules" # For docs
"-DCLANG_TOOL=${clang}/bin/clang"
"-DCLANG_OFFLOAD_BUNDLER_TOOL=${clang-unwrapped}/bin/clang-offload-bundler"
"-DPACKAGER_TOOL=${clang-unwrapped}/bin/clang-offload-packager"
"-DOPENMP_LLVM_TOOLS_DIR=${llvm}/bin"
"-DOPENMP_LLVM_LIT_EXECUTABLE=${lit}/bin/.lit-wrapped"
"-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
];
extraPostPatch = ''
# We can't build this target at the moment
substituteInPlace libomptarget/DeviceRTL/CMakeLists.txt \
--replace "gfx1010" ""
# No idea what's going on here...
cat ${./1000-openmp-failing-tests.list} | xargs -d \\n rm
'';
checkTargets = [ "check-${targetName}" ];
extraLicenses = [ lib.licenses.mit ];
}

View File

@@ -1,19 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
targetName = "polly";
targetDir = targetName;
extraPostPatch = ''
# `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists`
substituteInPlace CMakeLists.txt \
--replace-fail "NOT TARGET gtest" "FALSE"
'';
checkTargets = [ "check-${targetName}" ];
}

View File

@@ -1,16 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
buildTests = false; # Too many errors
targetName = "pstl";
targetDir = "runtimes";
targetRuntimes = [ targetName ];
checkTargets = [ "check-${targetName}" ];
}

View File

@@ -7,7 +7,6 @@
cmake, cmake,
rocm-cmake, rocm-cmake,
clr, clr,
clang-tools-extra,
openmp, openmp,
rocblas, rocblas,
rocmlir, rocmlir,
@@ -54,7 +53,7 @@ let
in in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "migraphx"; pname = "migraphx";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -71,7 +70,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "AMDMIGraphX"; repo = "AMDMIGraphX";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-VDYUSpWYAdJ63SKVCO26DVAC3RtZM7otqN0sYUA6DBQ="; hash = "sha256-h9cTbrMwHeRGVJS/uHQnCXplNcrBqxbhwz2AcAEso0M=";
}; };
nativeBuildInputs = nativeBuildInputs =
@@ -80,7 +79,6 @@ stdenv.mkDerivation (finalAttrs: {
cmake cmake
rocm-cmake rocm-cmake
clr clr
clang-tools-extra
python3Packages.python python3Packages.python
] ]
++ lib.optionals buildDocs [ ++ lib.optionals buildDocs [
@@ -172,8 +170,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {

View File

@@ -10,9 +10,13 @@
rocm-cmake, rocm-cmake,
rocblas, rocblas,
rocmlir, rocmlir,
rocrand,
rocm-runtime,
rocm-merged-llvm,
hipblas-common,
hipblas,
hipblaslt,
clr, clr,
clang-tools-extra,
clang-ocl,
composable_kernel, composable_kernel,
frugally-deep, frugally-deep,
rocm-docs-core, rocm-docs-core,
@@ -30,43 +34,53 @@
rocm-comgr, rocm-comgr,
roctracer, roctracer,
python3Packages, python3Packages,
# FIXME: should be able to use all clr targets
gpuTargets ? [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
], # clr.gpuTargets
buildDocs ? false, # Needs internet because of rocm-docs-core buildDocs ? false, # Needs internet because of rocm-docs-core
buildTests ? false, buildTests ? false,
withComposableKernel ? composable_kernel.anyGfx9Target,
}: }:
let let
version = "6.0.2"; # FIXME: cmake files need patched to include this properly
cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include";
version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "MIOpen"; repo = "MIOpen";
rev = "rocm-${version}"; rev = "rocm-${version}";
hash = "sha256-mbOdlSb0ESKi9hMkq3amv70Xkp/YKnZYre24d/y5TD0="; hash = "sha256-KV+tJPD4HQayY8zD4AdOFxxYRnyI47suxX5OgZ7mpdU=";
fetchLFS = true; fetchLFS = true;
fetchSubmodules = true;
# WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream
leaveDotGit = true; leaveDotGit = true;
# If you're reading this, it's gonna take a bit of time.
# fetchSubModules doesn't work with postFetch???
# fetchLFS isn't actually fetching the LFS files...
postFetch = '' postFetch = ''
export HOME=$(mktemp -d) export HOME=$(mktemp -d)
cd $out cd $out
set -x
# We need more history to fetch LFS files
git remote add origin $url git remote add origin $url
git fetch origin git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version}
git clean -fdx git clean -fdx
git checkout rocm-${version} git switch -c rocm-${version} refs/tags/rocm-${version}
git config lfs.fetchexclude "none"
# We need to do this manually since using leaveDotGit and fetchSubmodules errors rm .lfsconfig
git submodule update --init
# Fetch the LFS files
git lfs install git lfs install
git lfs fetch --all git lfs track "*.kdb.bz2"
GIT_TRACE=1 git lfs fetch --include="src/kernels/**"
GIT_TRACE=1 git lfs pull --include="src/kernels/**"
git lfs checkout git lfs checkout
# Remove the defunct .git folder
rm -rf .git rm -rf .git
''; '';
}; };
@@ -112,8 +126,13 @@ stdenv.mkDerivation (finalAttrs: {
inherit version src; inherit version src;
pname = "miopen"; pname = "miopen";
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
# Find zstd and add to target. Mainly for torch. # Find zstd and add to target. Mainly for torch.
patches = [ patches = [
./skip-preexisting-dbs.patch
./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448
(fetchpatch { (fetchpatch {
url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch"; url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch";
hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M="; hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M=";
@@ -122,11 +141,14 @@ stdenv.mkDerivation (finalAttrs: {
url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch";
hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs=";
}) })
(fetchpatch { # FIXME: We need to rebase or drop this arch compat patch
name = "Extend-MIOpen-ISA-compatibility.patch"; # https://github.com/ROCm/MIOpen/issues/3540 suggests that
url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; # arch compat patching doesn't work correctly for gfx1031
hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; # (fetchpatch {
}) # name = "Extend-MIOpen-ISA-compatibility.patch";
# url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
# hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
# })
]; ];
outputs = outputs =
@@ -139,21 +161,24 @@ stdenv.mkDerivation (finalAttrs: {
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
"test" "test"
]; ];
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ];
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
nativeBuildInputs = [ nativeBuildInputs = [
pkg-config pkg-config
cmake cmake
rocm-cmake rocm-cmake
clr clr
clang-tools-extra
]; ];
buildInputs = buildInputs =
[ [
hipblas
hipblas-common
rocblas rocblas
rocmlir rocmlir
clang-ocl
composable_kernel
half half
boost boost
sqlite sqlite
@@ -161,6 +186,11 @@ stdenv.mkDerivation (finalAttrs: {
nlohmann_json nlohmann_json
frugally-deep frugally-deep
roctracer roctracer
rocrand
hipblaslt
]
++ lib.optionals withComposableKernel [
composable_kernel
] ]
++ lib.optionals buildDocs [ ++ lib.optionals buildDocs [
latex latex
@@ -178,15 +208,32 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_CXX_FLAGS=-Wno-#warnings" # <half> -> <half/half.hpp> "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DUNZIPPER=${bzip2}/bin/bunzip2" "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
"-DMIOPEN_USE_SQLITE_PERFDB=ON"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
# needs to stream to stdout so bzcat rather than bunzip2
"-DUNZIPPER=${bzip2}/bin/bzcat"
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_CXX_COMPILER=amdclang++"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
(lib.cmakeBool "MIOPEN_USE_ROCBLAS" true)
(lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true)
(lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel)
(lib.cmakeBool "MIOPEN_USE_HIPRTC" true)
(lib.cmakeBool "MIOPEN_USE_COMGR" true)
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DMIOPEN_BACKEND=HIP" "-DMIOPEN_BACKEND=HIP"
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
@@ -195,24 +242,29 @@ stdenv.mkDerivation (finalAttrs: {
]; ];
postPatch = '' postPatch = ''
patchShebangs test src/composable_kernel fin utils install_deps.cmake substituteInPlace cmake/ClangTidy.cmake \
--replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy)
endmacro()
macro(enable_clang_tidy_unused)' \
--replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET)
return()'
substituteInPlace CMakeLists.txt \ patchShebangs test src/composable_kernel fin utils install_deps.cmake
--replace "unpack_db(\"\''${CMAKE_SOURCE_DIR}/src/kernels/\''${FILE_NAME}.kdb.bz2\")" "" \
--replace "MIOPEN_HIP_COMPILER MATCHES \".*clang\\\\+\\\\+$\"" "true" \
--replace "set(MIOPEN_TIDY_ERRORS ALL)" "" # error: missing required key 'key'
substituteInPlace test/gtest/CMakeLists.txt \ substituteInPlace test/gtest/CMakeLists.txt \
--replace "include(googletest)" "" --replace "include(googletest)" ""
substituteInPlace test/gtest/CMakeLists.txt \
--replace-fail " gtest_main " " ${gtest}/lib/libgtest.so ${gtest}/lib/libgtest_main.so "
ln -sf ${gfx900} src/kernels/gfx900.kdb ln -sf ${gfx900} src/kernels/gfx900.kdb
ln -sf ${gfx906} src/kernels/gfx906.kdb ln -sf ${gfx906} src/kernels/gfx906.kdb
ln -sf ${gfx908} src/kernels/gfx908.kdb ln -sf ${gfx908} src/kernels/gfx908.kdb
ln -sf ${gfx90a} src/kernels/gfx90a.kdb ln -sf ${gfx90a} src/kernels/gfx90a.kdb
ln -sf ${gfx1030} src/kernels/gfx1030.kdb ln -sf ${gfx1030} src/kernels/gfx1030.kdb
mkdir -p build/share/miopen/db/
ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb
ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb
ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb
ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb
ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb
''; '';
# Unfortunately, it seems like we have to call make on these manually # Unfortunately, it seems like we have to call make on these manually
@@ -249,13 +301,14 @@ stdenv.mkDerivation (finalAttrs: {
) )
} $test/bin/* } $test/bin/*
''; '';
# doCheck = false; # FIXME: clang-tidy really slow :(
requiredSystemFeatures = [ "big-parallel" ]; requiredSystemFeatures = [ "big-parallel" ];
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -264,8 +317,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,31 @@
From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001
From: "Sv. Lockal" <lockalsash@gmail.com>
Date: Tue, 24 Dec 2024 14:43:10 +0000
Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems
Closes #3441
---
driver/reducecalculation_driver.hpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp
index 8226b3c953..2001969509 100644
--- a/driver/reducecalculation_driver.hpp
+++ b/driver/reducecalculation_driver.hpp
@@ -33,6 +33,7 @@
#include "random.hpp"
#include <algorithm>
#include <cfloat>
+#include <cmath>
#include <cstdlib>
#include <memory>
#include <miopen/miopen.h>
@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc,
for(size_t i = 0; i < reduce_size; ++i)
{
Tcheck val = static_cast<Tcheck>(input[input_idx]);
- if(nanPropagation && isnan(val))
+ if(nanPropagation && std::isnan(val))
{
val = 0.0f;
}

View File

@@ -0,0 +1,22 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d0ffaf983..0b9ed0952 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,7 +554,7 @@ endif()
function(unpack_db db_bzip2_file)
get_filename_component(__fname ${db_bzip2_file} NAME_WLE)
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}
- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
string(REPLACE "." "_" __tname ${__fname})
add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname})
@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file)
if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db")
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt
DEPENDS sqlite2txt generate_${__tname}
- COMMAND $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
)
add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt)
add_dependencies(generate_kernels generate_${__tname}_txt)

View File

@@ -12,7 +12,6 @@
rocblas, rocblas,
miopen, miopen,
migraphx, migraphx,
clang,
openmp, openmp,
protobuf, protobuf,
qtcreator, qtcreator,
@@ -43,13 +42,13 @@ stdenv.mkDerivation (finalAttrs: {
"cpu" "cpu"
); );
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "MIVisionX"; repo = "MIVisionX";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-d32lcJq24MXeIWbNbo6putWaol5kF2io6cz4ZuL+DbE="; hash = "sha256-SisCbUDCAiWQ1Ue7qrtoT6vO/1ztzqji+3cJD6MXUNw=";
}; };
patches = [ patches = [
@@ -98,6 +97,9 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_PREFIX_PYTHON=lib" "-DCMAKE_INSTALL_PREFIX_PYTHON=lib"
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
# "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')` # "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')`
] ]
++ lib.optionals (gpuTargets != [ ]) [ ++ lib.optionals (gpuTargets != [ ]) [
@@ -115,37 +117,26 @@ stdenv.mkDerivation (finalAttrs: {
postPatch = '' postPatch = ''
# We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...` # We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...`
export CXXFLAGS+="--rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" export CXXFLAGS+=" --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
patchShebangs rocAL/rocAL_pybind/examples # Properly find miopen, fix ffmpeg version detection
# Properly find miopen
substituteInPlace amd_openvx_extensions/CMakeLists.txt \ substituteInPlace amd_openvx_extensions/CMakeLists.txt \
--replace "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \ --replace "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \
--replace "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h" --replace "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h"
# Properly find turbojpeg # Properly find turbojpeg
substituteInPlace amd_openvx/cmake/FindTurboJpeg.cmake \ substituteInPlace cmake/FindTurboJpeg.cmake \
--replace "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \ --replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \
--replace "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib" --replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib"
# Fix bad paths
substituteInPlace rocAL/rocAL/rocAL_hip/CMakeLists.txt amd_openvx_extensions/amd_nn/nn_hip/CMakeLists.txt amd_openvx/openvx/hipvx/CMakeLists.txt \
--replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clang}/bin/clang++"
''; '';
postBuild = lib.optionalString buildDocs '' postBuild = lib.optionalString buildDocs ''
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
''; '';
postInstall = lib.optionalString (!useOpenCL && !useCPU) ''
patchelf $out/lib/rocal_pybind*.so --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
chmod +x $out/lib/rocal_pybind*.so
'';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -154,8 +145,6 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = broken = useOpenCL;
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,42 @@
{
fetchFromGitHub,
stdenv,
cmake,
clr,
numactl,
nlohmann_json,
}:
stdenv.mkDerivation {
pname = "mscclpp";
version = "unstable-2024-12-13";
src = fetchFromGitHub {
owner = "microsoft";
repo = "mscclpp";
rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37";
hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
clr
numactl
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100"
'';
cmakeFlags = [
"-DMSCCLPP_BYPASS_GPU_CHECK=ON"
"-DMSCCLPP_USE_ROCM=ON"
"-DMSCCLPP_BUILD_TESTS=OFF"
"-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DMSCCLPP_BUILD_APPS_NCCL=ON"
"-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF"
"-DFETCHCONTENT_QUIET=OFF"
"-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS"
"-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}"
];
env.ROCM_PATH = clr;
}

View File

@@ -6,18 +6,35 @@
cmake, cmake,
rocm-cmake, rocm-cmake,
rocm-smi, rocm-smi,
rocm-core,
clr, clr,
mscclpp,
perl, perl,
hipify, hipify,
gtest, gtest,
chrpath, chrpath,
rocprofiler,
rocprofiler-register,
autoPatchelfHook,
buildTests ? false, buildTests ? false,
gpuTargets ? [ ], gpuTargets ? (clr.localGpuTargets or [ ]),
}: }:
let
useAsan = buildTests;
useUbsan = buildTests;
san = lib.optionalString (useAsan || useUbsan) (
"-fno-gpu-sanitize -fsanitize=undefined "
+ (lib.optionalString useAsan "-fsanitize=address -shared-libsan ")
);
in
# Note: we can't properly test or make use of multi-node collective ops
# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support
# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver
# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rccl"; pname = "rccl${clr.gpuArchSuffix}";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -27,11 +44,17 @@ stdenv.mkDerivation (finalAttrs: {
"test" "test"
]; ];
patches = [
./fix-mainline-support-and-ub.diff
./enable-mscclpp-on-all-gfx9.diff
./rccl-test-missing-iomanip.diff
];
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rccl"; repo = "rccl";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-Oyml47yGEB7fALxBcDjqFngS38cnI39sDj94/JV7wE0="; hash = "sha256-61yvFqloOO6qtn0H6XsAPvJ6LKlOeXgTD/xbjCuB3zQ=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -40,12 +63,16 @@ stdenv.mkDerivation (finalAttrs: {
clr clr
perl perl
hipify hipify
autoPatchelfHook # ASAN doesn't add rpath without this
]; ];
buildInputs = buildInputs =
[ [
rocm-smi rocm-smi
gtest gtest
rocprofiler
rocprofiler-register
mscclpp
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
chrpath chrpath
@@ -53,8 +80,17 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_CXX_COMPILER=hipcc" "-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_BUILD_TYPE=Release"
"-DROCM_PATH=${clr}"
"-DHIP_COMPILER=${clr}/bin/amdclang++"
"-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++"
"-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DBUILD_BFD=OFF" # Can't get it to detect bfd.h "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h
"-DENABLE_MSCCL_KERNEL=ON"
"-DENABLE_MSCCLPP=ON"
"-DMSCCLPP_ROOT=${mscclpp}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
@@ -62,32 +98,37 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
] ]
++ lib.optionals (gpuTargets != [ ]) [ ++ lib.optionals (gpuTargets != [ ]) [
# AMD can't make up their minds and keep changing which one is used in different projects.
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
"-DBUILD_TESTS=ON" "-DBUILD_TESTS=ON"
]; ];
# -O2 and -fno-strict-aliasing due to UB issues in RCCL :c
# Reported upstream
env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.LDFLAGS = "${san}";
postPatch = '' postPatch = ''
patchShebangs src tools patchShebangs src tools
# Really strange behavior, `#!/usr/bin/env perl` should work...
substituteInPlace CMakeLists.txt \
--replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" \
--replace-warn "-parallel-jobs=12" "-parallel-jobs=1" \
--replace-warn "-parallel-jobs=16" "-parallel-jobs=1"
''; '';
postInstall = lib.optionalString buildTests '' postInstall =
mkdir -p $test/bin lib.optionalString useAsan ''
mv $out/bin/* $test/bin patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so
rmdir $out/bin ''
''; + lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/* $test/bin
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -99,8 +140,5 @@ stdenv.mkDerivation (finalAttrs: {
]; ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,13 @@
diff --git a/src/init.cc b/src/init.cc
index 738f756..1b0e4fc 100644
--- a/src/init.cc
+++ b/src/init.cc
@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) {
hipDeviceProp_t devProp;
CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev));
- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94");
+ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9");
if (comm->mscclppCompatible) {
bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0);
auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId];

View File

@@ -0,0 +1,178 @@
diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h
index 8c5f081..9922b79 100644
--- a/src/include/bootstrap.h
+++ b/src/include/bootstrap.h
@@ -10,11 +10,13 @@
#include "nccl.h"
#include "comm.h"
+// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128]
struct ncclBootstrapHandle {
uint64_t magic;
union ncclSocketAddress addr;
};
static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID");
+static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB");
ncclResult_t bootstrapNetInit();
ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv);
diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc
index b3063d5..464b80d 100644
--- a/src/misc/rocmwrap.cc
+++ b/src/misc/rocmwrap.cc
@@ -131,9 +131,12 @@ static void initOnceFunc() {
//format and store the kernel conf file location
snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release);
fp = fopen(kernel_conf_file, "r");
- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file");
+ if (fp == NULL) {
+ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled");
+ }
//look for kernel_opt1 and kernel_opt2 in the conf file and check
- while (fgets(buf, sizeof(buf), fp) != NULL) {
+ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary.
+ while (fp && fgets(buf, sizeof(buf), fp) != NULL) {
if (strstr(buf, kernel_opt1) != NULL) {
found_opt1 = 1;
INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release);
@@ -143,11 +146,12 @@ static void initOnceFunc() {
INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release);
}
}
- if (!found_opt1 || !found_opt2) {
+ if (fp && (!found_opt1 || !found_opt2)) {
dmaBufSupport = 0;
INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release);
INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support");
}
+ if (fp) fclose(fp);
if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled");
else goto error;
diff --git a/src/nccl.h.in b/src/nccl.h.in
index 1d127b0..6296073 100644
--- a/src/nccl.h.in
+++ b/src/nccl.h.in
@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
/*! @brief Opaque unique id used to initialize communicators
@details The ncclUniqueId must be passed to all participating ranks */
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
/*! @defgroup rccl_result_code Result Codes
@details The various result codes that RCCL API calls may return
diff --git a/src/proxy.cc b/src/proxy.cc
index 50e5437..51bb401 100644
--- a/src/proxy.cc
+++ b/src/proxy.cc
@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool {
static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) {
if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) {
- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ if (pool->pools) {
+ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ } else {
+ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1));
+ }
NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE));
pool->banks++;
pool->offset = 0;
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc
index 6d77784..49762d3 100644
--- a/src/transport/net_ib.cc
+++ b/src/transport/net_ib.cc
@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() {
// Requires support from NIC driver modules
// Use ONLY for debugging!
moduleLoaded = 1;
- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
+ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
}
if (moduleLoaded == -1) {
@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() {
// or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module)
const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version",
"/sys/kernel/memory_peers/amdkfd/version",
- "/sys/memory_peers/amdkfd/version"};
+ "/sys/memory_peers/amdkfd/version",
+ NULL};
int i = 0;
while (memory_peers_paths[i]) {
if (access(memory_peers_paths[i], F_OK) == 0) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]);
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]);
break;
} else {
moduleLoaded = 0;
@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() {
if (moduleLoaded == 0) {
// Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms`
// if your system uses native OS ib_peer module
- char buf[256];
- FILE *fp = NULL;
- fp = fopen("/proc/kallsyms", "r");
+ FILE *fp = fopen("/proc/kallsyms", "r");
+ char *line = NULL;
+ size_t len = 0;
if (fp == NULL) {
- INFO(NCCL_INIT,"Could not open /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client");
} else {
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- if (strstr(buf, "t ib_register_peer_memory_client") != NULL ||
- strstr(buf, "T ib_register_peer_memory_client") != NULL) {
+ while (getline(&line, &len, fp) > 0) {
+ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms");
break;
}
}
}
+ if (line) free(line);
+ if (fp) fclose(fp);
}
#else
// Check for the nv_peer_mem module being loaded
@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() {
#endif
}
if (moduleLoaded == 0) {
- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
return ncclSystemError;
}
return ncclSuccess;
diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h
index 2c86c33..5801c61 100755
--- a/tools/ib-test/include/nccl.h
+++ b/tools/ib-test/include/nccl.h
@@ -31,7 +31,7 @@ extern "C" {
typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/* Error type */
typedef enum { ncclSuccess = 0,
diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h
index 729561b..4e4bdd9 100644
--- a/tools/topo_expl/include/nccl.h
+++ b/tools/topo_expl/include/nccl.h
@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_COMM_NULL NULL
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/*! @brief Error type */
typedef enum { ncclSuccess = 0,

View File

@@ -0,0 +1,10 @@
--- a/test/common/TestBed.cpp
+++ b/test/common/TestBed.cpp
@@ -4,6 +4,7 @@
* See LICENSE.txt for license information
************************************************************************/
#include <unistd.h>
+#include <iomanip>
#include "TestBed.hpp"
#include <rccl/rccl.h>

View File

@@ -4,6 +4,7 @@
fetchFromGitHub, fetchFromGitHub,
rocmUpdateScript, rocmUpdateScript,
cmake, cmake,
amdsmi,
rocm-smi, rocm-smi,
rocm-runtime, rocm-runtime,
libcap, libcap,
@@ -46,7 +47,7 @@ let
in in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rdc"; pname = "rdc";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -63,7 +64,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "rdc"; repo = "rdc";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-QugcajxILmDeQiWG5uAUO41Wut45irg2Ynufgn1bmps="; hash = "sha256-sKsti7LeWsxvOmc9h/srsl0OmHkJIRNRiV+8mFVG3/M=";
}; };
nativeBuildInputs = nativeBuildInputs =
@@ -79,6 +80,7 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs = buildInputs =
[ [
amdsmi
rocm-smi rocm-smi
rocm-runtime rocm-runtime
libcap libcap
@@ -126,8 +128,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -136,7 +138,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
# broken = versions.minor finalAttrs.version != versions.minor rocm-smi.version || versionAtLeast finalAttrs.version "7.0.0";
broken = true; # Too many errors, unsure how to fix
}; };
}) })

View File

@@ -11,6 +11,7 @@
rocrand, rocrand,
clr, clr,
git, git,
pkg-config,
openmp, openmp,
openmpi, openmpi,
gtest, gtest,
@@ -22,7 +23,7 @@
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocalution"; pname = "rocalution";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -42,7 +43,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "rocALUTION"; repo = "rocALUTION";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-mrN+CI2mqaMi8oKxui7HAIE2qSn50aNaFipkWwYMtbc="; hash = "sha256-xdZ3HUiRGsreHfJH8RgL/s3jGyC5ABmBKcEfgtqWg8Y=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -50,6 +51,7 @@ stdenv.mkDerivation (finalAttrs: {
rocm-cmake rocm-cmake
clr clr
git git
pkg-config
]; ];
buildInputs = buildInputs =
@@ -65,9 +67,12 @@ stdenv.mkDerivation (finalAttrs: {
gtest gtest
]; ];
CXXFLAGS = "-I${openmp.dev}/include";
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_CXX_COMPILER=hipcc" "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
"-DROCM_PATH=${clr}" "-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}" "-DHIP_ROOT_DIR=${clr}"
"-DSUPPORT_HIP=ON" "-DSUPPORT_HIP=ON"
@@ -82,6 +87,7 @@ stdenv.mkDerivation (finalAttrs: {
] ]
++ lib.optionals (gpuTargets != [ ]) [ ++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}" "-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON" "-DBUILD_CLIENTS_TESTS=ON"
@@ -115,8 +121,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -125,8 +131,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -14,21 +14,24 @@
gtest, gtest,
gfortran, gfortran,
openmp, openmp,
git,
amd-blis, amd-blis,
zstd,
hipblas-common,
hipblaslt,
python3Packages, python3Packages,
rocm-smi,
buildTensile ? true, buildTensile ? true,
buildTests ? false, buildTests ? true,
buildBenchmarks ? false, buildBenchmarks ? true,
tensileLogic ? "asm_full",
tensileCOVersion ? "default",
# https://github.com/ROCm/Tensile/issues/1757 # https://github.com/ROCm/Tensile/issues/1757
# Allows gfx101* users to use rocBLAS normally. # Allows gfx101* users to use rocBLAS normally.
# Turn the below two values to `true` after the fix has been cherry-picked # Turn the below two values to `true` after the fix has been cherry-picked
# into a release. Just backporting that single fix is not enough because it # into a release. Just backporting that single fix is not enough because it
# depends on some previous commits. # depends on some previous commits.
tensileSepArch ? false, tensileSepArch ? true,
tensileLazyLib ? false, tensileLazyLib ? true,
tensileLibFormat ? "msgpack", withHipBlasLt ? true,
# `gfx940`, `gfx941` are not present in this list because they are early # `gfx940`, `gfx941` are not present in this list because they are early
# engineering samples, and all final MI300 hardware are `gfx942`: # engineering samples, and all final MI300 hardware are `gfx942`:
# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
@@ -37,38 +40,47 @@
# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
# always try to use `gfx1010` code objects, hence building for `gfx1012` is # always try to use `gfx1010` code objects, hence building for `gfx1012` is
# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
gpuTargets ? [ gpuTargets ? (
"gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" clr.localGpuTargets or [
], "gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1010"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
]
),
}: }:
let
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocblas"; pname = "rocblas${clr.gpuArchSuffix}";
version = "6.0.2"; version = "6.3.1";
outputs = outputs = [
[ "out"
"out" ];
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocBLAS"; repo = "rocBLAS";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-G68d/gvBbTdNx8xR3xY+OkBm5Yxq1NFjxby9BbpOcUk="; hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4=";
}; };
nativeBuildInputs = nativeBuildInputs =
[ [
cmake cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake rocm-cmake
clr clr
git
] ]
++ lib.optionals buildTensile [ ++ lib.optionals buildTensile [
tensile tensile
@@ -77,12 +89,17 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs = buildInputs =
[ [
python3 python3
hipblas-common
]
++ lib.optionals withHipBlasLt [
hipblaslt
] ]
++ lib.optionals buildTensile [ ++ lib.optionals buildTensile [
zstd
msgpack msgpack
libxml2 libxml2
python3Packages.msgpack python3Packages.msgpack
python3Packages.joblib python3Packages.zstandard
] ]
++ lib.optionals buildTests [ ++ lib.optionals buildTests [
gtest gtest
@@ -91,38 +108,61 @@ stdenv.mkDerivation (finalAttrs: {
gfortran gfortran
openmp openmp
amd-blis amd-blis
rocm-smi
] ]
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
python3Packages.pyyaml python3Packages.pyyaml
]; ];
dontStrip = true;
env.CXXFLAGS =
"-O3 -DNDEBUG -I${hipblas-common}/include"
+ lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis";
# Fails to link tests if we don't add amd-blis libs
env.LDFLAGS = lib.optionalString (
buildTests || buildBenchmarks
) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas";
env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
cmakeFlags = cmakeFlags =
[ [
(lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc") (lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release")
(lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
(lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
(lib.cmakeFeature "python" "python3") (lib.cmakeFeature "python" "python3")
(lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets)) (lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
(lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false) (lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
(lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
# rocblas header files are not installed unless we set this (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
(lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include") (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
# Temporarily set variables to work around upstream CMakeLists issue
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBDIR=lib"
] ]
++ lib.optionals buildTensile [ ++ lib.optionals buildTensile [
"-DCPACK_SET_DESTDIR=OFF"
"-DLINK_BLIS=ON"
"-DTensile_CODE_OBJECT_VERSION=default"
"-DTensile_LOGIC=asm_full"
"-DTensile_LIBRARY_FORMAT=msgpack"
(lib.cmakeBool "BUILD_WITH_PIP" false) (lib.cmakeBool "BUILD_WITH_PIP" false)
(lib.cmakeFeature "Tensile_LOGIC" tensileLogic)
(lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion)
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
(lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat)
(lib.cmakeBool "Tensile_PRINT_DEBUG" true)
]
++ lib.optionals (buildTests || buildBenchmarks) [
(lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis")
]; ];
passthru.amdgpu_targets = gpuTargets';
patches = [ patches = [
(fetchpatch { (fetchpatch {
name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
@@ -135,14 +175,17 @@ stdenv.mkDerivation (finalAttrs: {
postPatch = '' postPatch = ''
substituteInPlace cmake/build-options.cmake \ substituteInPlace cmake/build-options.cmake \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
substituteInPlace CMakeLists.txt \
--replace-fail "4.42.0" "4.43.0"
''; '';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ]; requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; { meta = with lib; {
@@ -151,8 +194,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,66 +1,76 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
, rocm-cmake cmake,
, git rocm-cmake,
, rocm-comgr git,
, rocm-runtime rocm-comgr,
, hwdata rocm-runtime,
, texliveSmall hwdata,
, doxygen texliveSmall,
, graphviz doxygen,
, buildDocs ? true graphviz,
buildDocs ? true,
}: }:
let let
latex = lib.optionalAttrs buildDocs (texliveSmall.withPackages (ps: with ps; [ latex = lib.optionalAttrs buildDocs (
changepage texliveSmall.withPackages (
latexmk ps: with ps; [
varwidth changepage
multirow latexmk
hanging varwidth
adjustbox multirow
collectbox hanging
stackengine adjustbox
enumitem collectbox
alphalph stackengine
wasysym enumitem
sectsty alphalph
tocloft wasysym
newunicodechar sectsty
etoc tocloft
helvetic newunicodechar
wasy etoc
courier helvetic
])); wasy
in stdenv.mkDerivation (finalAttrs: { courier
]
)
);
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocdbgapi"; pname = "rocdbgapi";
version = "6.0.2"; version = "6.3.1";
outputs = [ outputs =
"out" [
] ++ lib.optionals buildDocs [ "out"
"doc" ]
]; ++ lib.optionals buildDocs [
"doc"
];
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "ROCdbgapi"; repo = "ROCdbgapi";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-+CxaTmxRt/RicqQddqIEHs8vvAPCMKXkWg7kbZvnUsQ="; hash = "sha256-6itfBrWVspobU47aiJAOQoxT8chwrq9scRn0or3bXto=";
}; };
nativeBuildInputs = [ nativeBuildInputs =
cmake [
rocm-cmake cmake
git rocm-cmake
] ++ lib.optionals buildDocs [ git
latex ]
doxygen ++ lib.optionals buildDocs [
graphviz latex
]; doxygen
graphviz
];
buildInputs = [ buildInputs = [
rocm-comgr rocm-comgr
@@ -83,21 +93,15 @@ in stdenv.mkDerivation (finalAttrs: {
make -j$NIX_BUILD_CORES doc make -j$NIX_BUILD_CORES doc
''; '';
postInstall = '' postInstall = lib.optionalString buildDocs ''
substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-config.cmake \
--replace "/build/source/build/" ""
substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-targets.cmake \
--replace "/build/source/build" "$out"
'' + lib.optionalString buildDocs ''
mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html
rmdir $out/share/html rmdir $out/share/html
''; '';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -106,6 +110,5 @@ in stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,5 +1,4 @@
{ {
rocfft,
lib, lib,
stdenv, stdenv,
fetchFromGitHub, fetchFromGitHub,
@@ -15,18 +14,18 @@
gtest, gtest,
openmp, openmp,
rocrand, rocrand,
gpuTargets ? [ ], gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocfft"; pname = "rocfft${clr.gpuArchSuffix}";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocFFT"; repo = "rocFFT";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-6Gjsy14GeR08VqnNmFhu8EyYDnQ+VZRlg+u9MAAWfHc="; hash = "sha256-RrxdwZ64uC7lQzyJI1eGHX2dmRnW8TfNThnuvuz5XWo=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -36,6 +35,8 @@ stdenv.mkDerivation (finalAttrs: {
rocm-cmake rocm-cmake
]; ];
# FIXME: rocfft_aot_helper runs at the end of the build and has a risk of timing it out
# due to a long period with no terminal output
buildInputs = [ sqlite ]; buildInputs = [ sqlite ];
cmakeFlags = cmakeFlags =
@@ -156,8 +157,8 @@ stdenv.mkDerivation (finalAttrs: {
updateScript = rocmUpdateScript { updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
}; };
@@ -169,8 +170,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -21,13 +21,13 @@
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocgdb"; pname = "rocgdb";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "ROCgdb"; repo = "ROCgdb";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-XeX/k8gfo9HgcUSIjs35C7IqCmFhvBOqQJSOoPF6HK4="; hash = "sha256-P9NbYMrCs0UpnaEIP+bJEM6yPiRHzl0lI0J4+A7/ePc=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -91,8 +91,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -101,6 +101,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.gpl3Plus; license = licenses.gpl3Plus;
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,27 +1,31 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
rocm-core,
cmake,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocm-cmake"; pname = "rocm-cmake";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocm-cmake"; repo = "rocm-cmake";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-qSjWT0KOQ5oDV06tfnKN+H/JzdoOnR9KY0c+SjvDepM="; hash = "sha256-8kEcwqHJF584AteuddP7Ai7n6ltVZJ8a6RsYIWGMs0U=";
}; };
nativeBuildInputs = [ cmake ]; nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-core ];
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -30,6 +34,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.mit; license = licenses.mit;
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.unix; platforms = platforms.unix;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,11 +1,13 @@
{ {
lib, lib,
stdenv, stdenv,
fetchFromGitHub, fetchpatch,
rocmUpdateScript,
cmake, cmake,
rocm-cmake, python3,
rocm-merged-llvm,
rocm-device-libs, rocm-device-libs,
zlib,
zstd,
libxml2, libxml2,
}: }:
@@ -20,34 +22,45 @@ let
in in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocm-comgr"; pname = "rocm-comgr";
version = "6.0.2"; # In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
src = fetchFromGitHub { sourceRoot = "${finalAttrs.src.name}/amd/comgr";
owner = "ROCm";
repo = "ROCm-CompilerSupport";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-9HuNU/k+kPJMlzqOTM20gm6SAOWJe9tpAZXEj4erdmI=";
};
sourceRoot = "${finalAttrs.src.name}/lib/comgr"; patches = [
# [Comgr] Extend ISA compatibility
(fetchpatch {
sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch";
relative = "amd/comgr";
})
#[Comgr] Extend ISA compatibility for CCOB
(fetchpatch {
sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch";
relative = "amd/comgr";
})
];
nativeBuildInputs = [ nativeBuildInputs = [
cmake cmake
rocm-cmake python3
]; ];
buildInputs = [ buildInputs = [
rocm-device-libs rocm-device-libs
libxml2 libxml2
zlib
zstd
rocm-merged-llvm
]; ];
cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;X86" ]; cmakeFlags = [
"-DCMAKE_VERBOSE_MAKEFILE=ON"
passthru.updateScript = rocmUpdateScript { "-DCMAKE_BUILD_TYPE=Release"
name = finalAttrs.pname; "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
owner = finalAttrs.src.owner; ];
repo = finalAttrs.src.repo;
};
meta = with lib; { meta = with lib; {
description = "APIs for compiling and inspecting AMDGPU code objects"; description = "APIs for compiling and inspecting AMDGPU code objects";
@@ -55,8 +68,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.ncsa; license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -4,28 +4,43 @@
fetchFromGitHub, fetchFromGitHub,
rocmUpdateScript, rocmUpdateScript,
cmake, cmake,
writeText,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocm-core"; pname = "rocm-core";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocm-core"; repo = "rocm-core";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-sgL1UMt3o01zA8v41dyCG1fAsK/PkTRsHQJOvlNatZ4="; hash = "sha256-UDnPGvgwzwv49CzF+Kt0v95CsxS33BZeqNcKw1K6jRI=";
}; };
nativeBuildInputs = [ cmake ]; nativeBuildInputs = [ cmake ];
cmakeFlags = [ "-DROCM_VERSION=${finalAttrs.version}" ]; # FIXME: What's the correct way to set this?
env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}";
env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}";
env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}";
cmakeFlags = [
"-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${finalAttrs.version}"
"-DBUILD_ID=${finalAttrs.env.BUILD_ID}"
];
setupHook = writeText "setupHook.sh" ''
export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
export BUILD_ID="${finalAttrs.env.BUILD_ID}"
export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}"
'';
passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION;
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
page = "tags?per_page=1"; page = "tags?per_page=4";
filter = ".[0].name | split(\"-\") | .[1]";
}; };
meta = with lib; { meta = with lib; {
@@ -34,8 +49,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,7 +1,7 @@
diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake
index 07c60eb..c736b3e 100644 index 07c60eb..c736b3e 100644
--- a/cmake/Packages.cmake --- a/amd/device-libs/cmake/Packages.cmake
+++ b/cmake/Packages.cmake +++ b/amd/device-libs/cmake/Packages.cmake
@@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES @@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES
IMPORTED_LOCATION \"${target_path}\")") IMPORTED_LOCATION \"${target_path}\")")
endforeach() endforeach()

View File

@@ -1,11 +1,14 @@
{ {
lib, lib,
stdenv, stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake, cmake,
rocm-cmake, ninja,
libxml2, libxml2,
zlib,
zstd,
ncurses,
rocm-merged-llvm,
python3,
}: }:
let let
@@ -19,30 +22,34 @@ let
in in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocm-device-libs"; pname = "rocm-device-libs";
version = "6.0.2"; # In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
src = fetchFromGitHub { postPatch = ''
owner = "ROCm"; cd amd/device-libs
repo = "ROCm-Device-Libs"; '';
rev = "rocm-${finalAttrs.version}";
hash = "sha256-7XG7oSkJ3EPWTYGea0I50eB1/DPMD5agmjctxZYTbLQ=";
};
patches = [ ./cmake.patch ]; patches = [ ./cmake.patch ];
nativeBuildInputs = [ nativeBuildInputs = [
cmake cmake
rocm-cmake ninja
python3
]; ];
buildInputs = [ libxml2 ]; buildInputs = [
cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" ]; libxml2
zlib
zstd
ncurses
rocm-merged-llvm
];
passthru.updateScript = rocmUpdateScript { cmakeFlags = [
name = finalAttrs.pname; "-DCMAKE_RELEASE_TYPE=Release"
owner = finalAttrs.src.owner; "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
repo = finalAttrs.src.repo; ];
};
meta = with lib; { meta = with lib; {
description = "Set of AMD-specific device-side language runtime libraries"; description = "Set of AMD-specific device-side language runtime libraries";
@@ -50,8 +57,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.ncsa; license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,23 +1,23 @@
{ lib {
, stdenv lib,
, fetchFromGitHub fetchFromGitHub,
, gitUpdater gitUpdater,
, buildPythonPackage buildPythonPackage,
, setuptools setuptools,
, beautifulsoup4 beautifulsoup4,
, gitpython gitpython,
, pydata-sphinx-theme pydata-sphinx-theme,
, pygithub pygithub,
, sphinx sphinx,
, breathe breathe,
, myst-parser myst-parser,
, sphinx-book-theme sphinx-book-theme,
, sphinx-copybutton sphinx-copybutton,
, sphinx-design sphinx-design,
, sphinx-external-toc sphinx-external-toc,
, sphinx-notfound-page sphinx-notfound-page,
, pyyaml pyyaml,
, fastjsonschema fastjsonschema,
}: }:
# FIXME: Move to rocmPackages_common # FIXME: Move to rocmPackages_common
@@ -59,7 +59,10 @@ buildPythonPackage rec {
meta = with lib; { meta = with lib; {
description = "ROCm Documentation Python package for ReadTheDocs build standardization"; description = "ROCm Documentation Python package for ReadTheDocs build standardization";
homepage = "https://github.com/ROCm/rocm-docs-core"; homepage = "https://github.com/ROCm/rocm-docs-core";
license = with licenses; [ mit cc-by-40 ]; license = with licenses; [
mit
cc-by-40
];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
}; };

View File

@@ -0,0 +1,27 @@
{
symlinkJoin,
linkFarm,
clr,
hipblas,
hipblas-common,
rocblas,
rocsolver,
rocsparse,
rocm-device-libs,
rocm-smi,
llvm,
}:
symlinkJoin {
name = "rocm-path-${clr.version}";
paths = [
clr
hipblas-common
hipblas
rocblas
rocsolver
rocsparse
rocm-device-libs
rocm-smi
(linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; })
];
}

View File

@@ -6,37 +6,40 @@
rocmUpdateScript, rocmUpdateScript,
pkg-config, pkg-config,
cmake, cmake,
ninja,
xxd, xxd,
rocm-device-libs, rocm-device-libs,
rocm-thunk,
elfutils, elfutils,
libdrm, libdrm,
numactl, numactl,
valgrind, valgrind,
libxml2, libxml2,
rocm-merged-llvm,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocm-runtime"; pname = "rocm-runtime";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "ROCR-Runtime"; repo = "ROCR-Runtime";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-xNMG954HI9SOfvYYB/62fhmm9mmR4I10uHP2nqn9EgI="; hash = "sha256-btpiIPV9REMvrmRSUzBIpBO6ehVIMmEmG+H8hqHDxdE=";
}; };
sourceRoot = "${finalAttrs.src.name}/src"; env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
nativeBuildInputs = [ nativeBuildInputs = [
pkg-config pkg-config
cmake cmake
ninja
xxd xxd
rocm-merged-llvm
]; ];
buildInputs = [ buildInputs = [
rocm-thunk
elfutils elfutils
libdrm libdrm
numactl numactl
@@ -44,34 +47,56 @@ stdenv.mkDerivation (finalAttrs: {
libxml2 libxml2
]; ];
cmakeFlags = [
"-DBUILD_SHARED_LIBS=ON"
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
patches = [ patches = [
# Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272
(fetchpatch { (fetchpatch {
name = "extend-isa-compatibility-check.patch"; # [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int
url = "https://salsa.debian.org/rocm-team/rocr-runtime/-/raw/076026d43bbee7f816b81fea72f984213a9ff961/debian/patches/0004-extend-isa-compatibility-check.patch"; url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch";
hash = "sha256-cC030zVGS4kNXwaztv5cwfXfVwOldpLGV9iYgEfPEnY="; hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE=";
stripLen = 1;
}) })
(fetchpatch {
# [PATCH] rocr: refactor of runtime.cpp based on Coverity
url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch";
hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0=";
})
(fetchpatch {
# [PATCH] queues: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch";
hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U=";
})
(fetchpatch {
# [PATCH] topology: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch";
hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w=";
})
(fetchpatch {
# [PATCH] kfd_ioctl: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch";
hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns=";
})
./remove-hsa-aqlprofile-dep.patch
]; ];
postPatch = '' postPatch = ''
patchShebangs image/blit_src/create_hsaco_ascii_file.sh patchShebangs --host image core runtime
patchShebangs core/runtime/trap_handler/create_trap_handler_header.sh
patchShebangs core/runtime/blit_shaders/create_blit_shader_header.sh
substituteInPlace CMakeLists.txt \ substituteInPlace CMakeLists.txt \
--replace 'hsa/include/hsa' 'include/hsa' --replace 'hsa/include/hsa' 'include/hsa'
# We compile clang before rocm-device-libs, so patch it in afterwards export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
# Replace object version: https://github.com/ROCm/ROCR-Runtime/issues/166 (TODO: Remove on LLVM update?)
substituteInPlace image/blit_src/CMakeLists.txt \
--replace '-cl-denorms-are-zero' '-cl-denorms-are-zero --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode' \
--replace '-mcode-object-version=4' '-mcode-object-version=5'
''; '';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -80,8 +105,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ ncsa ]; license = with licenses; [ ncsa ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,27 @@
libhsa-amd-aqlprofile64 library is unfree
Bug: https://github.com/ROCm/ROCm/issues/1781
--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
+++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
setFlag(HSA_EXTENSION_AMD_PC_SAMPLING);
}
- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
- os::CloseLib(lib);
- setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
- }
-
setFlag(HSA_EXTENSION_AMD_PROFILER);
break;
--- a/runtime/hsa-runtime/core/runtime/hsa.cpp
+++ b/runtime/hsa-runtime/core/runtime/hsa.cpp
@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v
return HSA_STATUS_SUCCESS;
}
- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) {
+ if (0) {
if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) {
debug_print("aqlprofile API incompatible ver %d, current ver %d\n",
version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR);

View File

@@ -1,20 +1,21 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
, wrapPython cmake,
wrapPython,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocm-smi"; pname = "rocm-smi";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocm_smi_lib"; repo = "rocm_smi_lib";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-fS52hpTv1WEycwkGZLXjz383WJWzyk8RvJRshEQSG/A="; hash = "sha256-j9pkyUt+p6IkhawIhiTymqDBydxXZunxmdyCyRN0RxE=";
}; };
patches = [ ./cmake.patch ]; patches = [ ./cmake.patch ];
@@ -34,13 +35,14 @@ stdenv.mkDerivation (finalAttrs: {
postInstall = '' postInstall = ''
wrapPythonProgramsIn $out wrapPythonProgramsIn $out
mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py
mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py
''; '';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -49,6 +51,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = [ "x86_64-linux" ]; platforms = [ "x86_64-linux" ];
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,32 @@
{
clr,
ollama,
python3Packages,
rocmPackages,
magma-hip,
emptyDirectory,
stdenv,
}:
# This package exists purely to have a bunch of passthru.tests attrs
stdenv.mkDerivation {
name = "rocm-tests";
nativeBuildInputs = [
clr
];
src = emptyDirectory;
postInstall = "mkdir -p $out";
passthru.tests = {
ollama = ollama.override {
inherit rocmPackages;
acceleration = "rocm";
};
torch = python3Packages.torch.override {
inherit rocmPackages;
rocmSupport = true;
cudaSupport = false;
magma-hip = magma-hip.override {
inherit rocmPackages;
};
};
};
}

View File

@@ -1,54 +0,0 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, pkg-config
, cmake
, libdrm
, numactl
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-thunk";
version = "6.0.2";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCT-Thunk-Interface";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-F6Qi+A9DuSx2e4WSfp4cnniKr0CkCZcZqsKwQmmZHhk=";
};
nativeBuildInputs = [
pkg-config
cmake
];
buildInputs = [
libdrm
numactl
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
meta = with lib; {
description = "Radeon open compute thunk interface";
homepage = "https://github.com/ROCm/ROCT-Thunk-Interface";
license = with licenses; [ bsd2 mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View File

@@ -9,24 +9,25 @@
busybox, busybox,
python3, python3,
gnugrep, gnugrep,
clr, # Only for localGpuTargets
# rocminfo requires that the calling user have a password and be in # rocminfo requires that the calling user have a password and be in
# the video group. If we let rocm_agent_enumerator rely upon # the video group. If we let rocm_agent_enumerator rely upon
# rocminfo's output, then it, too, has those requirements. Instead, # rocminfo's output, then it, too, has those requirements. Instead,
# we can specify the GPU targets for this system (e.g. "gfx803" for # we can specify the GPU targets for this system (e.g. "gfx803" for
# Polaris) such that no system call is needed for downstream # Polaris) such that no system call is needed for downstream
# compilers to determine the desired target. # compilers to determine the desired target.
defaultTargets ? [ ], defaultTargets ? (clr.localGpuTargets or [ ]),
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
version = "6.0.2"; version = "6.3.1";
pname = "rocminfo"; pname = "rocminfo";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocminfo"; repo = "rocminfo";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
sha256 = "sha256-k0QeCyQcarGbAh4ft8Y7JBK6l2nWxDUc20XoYmtrMMs="; sha256 = "sha256-TL57Mznq5qPorDON0EaINBCoEFMN4dcAmRfRgS//nok=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -49,8 +50,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -59,9 +60,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.ncsa; license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
stdenv.hostPlatform.isAarch64
|| versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -7,7 +7,6 @@
cmake, cmake,
rocm-cmake, rocm-cmake,
rocminfo, rocminfo,
ninja,
clr, clr,
git, git,
libxml2, libxml2,
@@ -20,6 +19,13 @@
buildTests ? false, # `argument of type 'NoneType' is not iterable` buildTests ? false, # `argument of type 'NoneType' is not iterable`
}: }:
# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly
# It seems to be forked from AMD's own LLVM
# If possible reusing the rocmPackages.llvm build would be better
# Would have to confirm it is compatible with ROCm's tagged LLVM.
# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways
# in subdirs
# Theoretically, we could have our MLIR have an output # Theoretically, we could have our MLIR have an output
# with the source and built objects so that we can just # with the source and built objects so that we can just
# use it as the external LLVM repo for this # use it as the external LLVM repo for this
@@ -36,7 +42,7 @@ let
in in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocmlir${suffix}"; pname = "rocmlir${suffix}";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -50,13 +56,12 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "rocMLIR"; repo = "rocMLIR";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-AypY0vL8Ij1zLycwpG2EPWWl4utp4ejXpAK0Jj/UvrA="; hash = "sha256-0SQ6uLDRfVfdCX+8a7D6pu6dYlFvX0HFzCDEvlKYfak=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
cmake cmake
rocm-cmake rocm-cmake
ninja
clr clr
python3Packages.python python3Packages.python
python3Packages.tomli python3Packages.tomli
@@ -75,23 +80,17 @@ stdenv.mkDerivation (finalAttrs: {
]; ];
patches = [ patches = [
(fetchpatch { ./initparamdata-sort-const.patch
name = "fix-TosaToRock-missing-includes.patch";
url = "https://github.com/ROCm/rocMLIR/commit/80b8c94a5dd6ab832733116fe0339c1d6011ab57.patch";
hash = "sha256-przg1AQZTiVbVd/4wA+KlGXu/RISO5n11FBkmUFKRSA=";
})
(fetchpatch {
name = "fix-cmake-depedency-on-transforms.patch";
url = "https://github.com/ROCm/rocMLIR/commit/b85ca4855e0f0214c2fd695e493c884cf08a3472.patch";
hash = "sha256-m108PnwvDAN3xWko+gZMgvCNFl4LXTvC67JHXhFHeBc=";
})
]; ];
cmakeFlags = cmakeFlags =
[ [
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
"-DLLVM_ENABLE_ZSTD=ON" "-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZLIB=ON" "-DLLVM_USE_LINKER=lld"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_LIBCXX=ON"
"-DLLVM_ENABLE_TERMINFO=ON" "-DLLVM_ENABLE_TERMINFO=ON"
"-DROCM_PATH=${clr}" "-DROCM_PATH=${clr}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
@@ -99,9 +98,7 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
] (lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler)
++ lib.optionals buildRockCompiler [
"-DBUILD_FAT_LIBROCKCOMPILER=ON"
] ]
++ lib.optionals (!buildRockCompiler) [ ++ lib.optionals (!buildRockCompiler) [
"-DROCM_TEST_CHIPSET=gfx000" "-DROCM_TEST_CHIPSET=gfx000"
@@ -111,6 +108,10 @@ stdenv.mkDerivation (finalAttrs: {
patchShebangs mlir patchShebangs mlir
patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py
# Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read'
substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \
--replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };"
# remove when no longer required # remove when no longer required
substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \ substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \
--replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin" --replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin"
@@ -150,10 +151,9 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
page = "tags?per_page=2"; page = "tags?per_page=4";
filter = ".[1].name | split(\"-\") | .[1]";
}; };
meta = with lib; { meta = with lib; {
@@ -162,8 +162,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ asl20 ]; license = with licenses; [ asl20 ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,13 @@
diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
index 3f5ee596819a..590d53788822 100644
--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
+++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
@@ -209,7 +209,7 @@ private:
size_t original_pos;
int64_t padding_amount;
- bool operator<(const InitParamData &rhs) {
+ bool operator<(const InitParamData &rhs) const {
if (this->padding_amount < rhs.padding_amount) {
return true;
} else if (this->padding_amount == rhs.padding_amount) {

View File

@@ -1,87 +1,91 @@
{ lib {
, fetchpatch lib,
, stdenv stdenv,
, fetchFromGitHub fetchFromGitHub,
, rocmUpdateScript rocmUpdateScript,
, cmake cmake,
, rocm-cmake rocm-cmake,
, clr clr,
, gtest gtest,
, gbenchmark gbenchmark,
, buildTests ? false buildTests ? false,
, buildBenchmarks ? false buildBenchmarks ? false,
, gpuTargets ? [ ] gpuTargets ? [ ],
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocprim"; pname = "rocprim";
version = "6.0.2"; version = "6.3.1";
outputs = [ outputs =
"out" [
] ++ lib.optionals buildTests [ "out"
"test" ]
] ++ lib.optionals buildBenchmarks [ ++ lib.optionals buildTests [
"benchmark" "test"
]; ]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocPRIM"; repo = "rocPRIM";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-nWvq26qRPZ6Au1rc5cR74TKArcdUFg7O9djFi8SvMeM="; hash = "sha256-0aHxpBuYIYhI2UER45YhHHL5YcxA+XeXoihcUs2AmCo=";
}; };
patches = [
(fetchpatch {
name = "arch-conversion-marco.patch";
url = "https://salsa.debian.org/rocm-team/rocprim/-/raw/70c8aaee3cf545d92685f4ed9bf8f41e3d4d570c/debian/patches/arch-conversion-macro.patch";
hash = "sha256-oXdmbCArOB5bKE8ozDFrSh4opbO+c4VI6PNhljeUSms=";
})
];
nativeBuildInputs = [ nativeBuildInputs = [
cmake cmake
rocm-cmake rocm-cmake
clr clr
]; ];
buildInputs = lib.optionals buildTests [ buildInputs =
gtest lib.optionals buildTests [
] ++ lib.optionals buildBenchmarks [ gtest
gbenchmark ]
]; ++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [ cmakeFlags =
"-DCMAKE_CXX_COMPILER=hipcc" [
# Manually define CMAKE_INSTALL_<DIR> "-DCMAKE_BUILD_TYPE=Release"
# See: https://github.com/NixOS/nixpkgs/pull/197838 # Manually define CMAKE_INSTALL_<DIR>
"-DCMAKE_INSTALL_BINDIR=bin" # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_LIBDIR=lib"
] ++ lib.optionals (gpuTargets != [ ]) [ "-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ]
] ++ lib.optionals buildTests [ ++ lib.optionals (gpuTargets != [ ]) [
"-DBUILD_TEST=ON" "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
] ++ lib.optionals buildBenchmarks [ ]
"-DBUILD_BENCHMARK=ON" ++ lib.optionals buildTests [
]; "-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall = lib.optionalString buildTests '' postInstall =
mkdir -p $test/bin lib.optionalString buildTests ''
mv $out/bin/test_* $test/bin mkdir -p $test/bin
mv $out/bin/rocprim $test/bin mv $out/bin/test_* $test/bin
'' + lib.optionalString buildBenchmarks '' mv $out/bin/rocprim $test/bin
mkdir -p $benchmark/bin ''
mv $out/bin/benchmark_* $benchmark/bin + lib.optionalString buildBenchmarks ''
'' + lib.optionalString (buildTests || buildBenchmarks) '' mkdir -p $benchmark/bin
rmdir $out/bin mv $out/bin/benchmark_* $benchmark/bin
''; ''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -90,6 +94,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,79 @@
{
lib,
stdenv,
rocm-runtime,
rocprofiler,
numactl,
libpciaccess,
libxml2,
elfutils,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clang,
clr,
python3Packages,
gpuTargets ? clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler-register";
version = "6.3.1";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocprofiler-register";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-UZsCiGnudsbL1v5lKBx7Vz3/HRnGn4f86Pd+qu3ryh0=";
fetchSubmodules = true;
};
nativeBuildInputs = [
cmake
clang
clr
];
buildInputs = [
numactl
libpciaccess
libxml2
elfutils
rocm-runtime
rocprofiler.rocmtoolkit-merged
python3Packages.lxml
python3Packages.cppheaderparser
python3Packages.pyyaml
python3Packages.barectf
python3Packages.pandas
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DBUILD_TEST=OFF"
"-DROCPROFILER_BUILD_TESTS=0"
"-DROCPROFILER_BUILD_SAMPLES=0"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
passthru.updateScript = rocmUpdateScript {
name = "rocprofiler-register";
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Profiling with perf-counters and derived metrics";
homepage = "https://github.com/ROCm/rocprofiler";
license = with licenses; [ mit ]; # mitx11
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
})

View File

@@ -4,17 +4,14 @@
fetchFromGitHub, fetchFromGitHub,
rocmUpdateScript, rocmUpdateScript,
symlinkJoin, symlinkJoin,
replaceVars,
cmake, cmake,
clang, clang,
clr, clr,
rocm-core, rocm-core,
rocm-thunk, rocm-runtime,
rocm-device-libs, rocm-device-libs,
roctracer, roctracer,
rocdbgapi, rocdbgapi,
rocm-smi,
hsa-amd-aqlprofile-bin,
numactl, numactl,
libpciaccess, libpciaccess,
libxml2, libxml2,
@@ -22,6 +19,7 @@
mpi, mpi,
systemd, systemd,
gtest, gtest,
git,
python3Packages, python3Packages,
gpuTargets ? clr.gpuTargets, gpuTargets ? clr.gpuTargets,
}: }:
@@ -32,12 +30,10 @@ let
paths = [ paths = [
rocm-core rocm-core
rocm-thunk rocm-runtime
rocm-device-libs rocm-device-libs
roctracer roctracer
rocdbgapi rocdbgapi
rocm-smi
hsa-amd-aqlprofile-bin
clr clr
]; ];
@@ -48,32 +44,27 @@ let
in in
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler"; pname = "rocprofiler";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocprofiler"; repo = "rocprofiler";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-yzgw9g5cHAZpdbU44+1ScZyUcZ2I4GGfjbm9GSqCClk="; hash = "sha256-kLiqKwxpeAkziBq4FRWhJ3IAvxVRcxi3AEEWgcVOfw4=";
fetchSubmodules = true;
}; };
patches = [ patches = [
# These just simply won't build # These just simply won't build
./0000-dont-install-tests-hsaco.patch ./0000-dont-install-tests-hsaco.patch
./optional-aql-in-cmake.patch
# Fix bad paths
(replaceVars ./0001-fix-shell-scripts.patch {
rocmtoolkit_merged = rocmtoolkit-merged;
})
# Fix for missing uint32_t not defined
./0002-include-stdint-in-version.patch
]; ];
nativeBuildInputs = [ nativeBuildInputs = [
cmake cmake
clang clang
clr clr
git
python3Packages.lxml python3Packages.lxml
python3Packages.cppheaderparser python3Packages.cppheaderparser
python3Packages.pyyaml python3Packages.pyyaml
@@ -93,12 +84,20 @@ stdenv.mkDerivation (finalAttrs: {
propagatedBuildInputs = [ rocmtoolkit-merged ]; propagatedBuildInputs = [ rocmtoolkit-merged ];
# HACK: allow building without aqlprofile, probably explodes at runtime if use profiling
env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all";
#HACK: rocprofiler's cmake doesn't add these deps properly
env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w";
cmakeFlags = [ cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}" "-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DBUILD_TEST=OFF"
"-DROCPROFILER_BUILD_TESTS=0"
"-DROCPROFILER_BUILD_SAMPLES=0"
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include" "-DCMAKE_INSTALL_INCLUDEDIR=include"
@@ -107,6 +106,13 @@ stdenv.mkDerivation (finalAttrs: {
postPatch = '' postPatch = ''
patchShebangs . patchShebangs .
substituteInPlace cmake_modules/rocprofiler_utils.cmake \
--replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)
return()'
substituteInPlace CMakeLists.txt \
--replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' ""
substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \ substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \
--replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" --replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
@@ -115,20 +121,16 @@ stdenv.mkDerivation (finalAttrs: {
''; '';
postInstall = '' postInstall = ''
# Why do these not already have the executable bit set?
chmod +x $out/lib/rocprofiler/librocprof-tool.so
chmod +x $out/share/rocprofiler/tests-v1/test/ocl/SimpleConvolution
# Why do these have the executable bit set? # Why do these have the executable bit set?
chmod -x $out/libexec/rocprofiler/counters/basic_counters.xml chmod -x $out/libexec/rocprofiler/counters/*.xml
chmod -x $out/libexec/rocprofiler/counters/derived_counters.xml
''; '';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
passthru.rocmtoolkit-merged = rocmtoolkit-merged;
meta = with lib; { meta = with lib; {
description = "Profiling with perf-counters and derived metrics"; description = "Profiling with perf-counters and derived metrics";
@@ -136,8 +138,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; # mitx11 license = with licenses; [ mit ]; # mitx11
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor clr.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -0,0 +1,147 @@
From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch
diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake
index 7b7c472..0aba3ed 100644
--- a/cmake_modules/rocprofiler_env.cmake
+++ b/cmake_modules/rocprofiler_env.cmake
@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE)
target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1)
endif()
+set(ROCPROFILER_LD_AQLPROFILE false)
# Enable direct loading of AQL-profile HSA extension
if(ROCPROFILER_LD_AQLPROFILE)
target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1)
@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "")
message(FATAL_ERROR "ROCM_ROOT_DIR is not found.")
endif()
-find_library(
- HSA_AMD_AQLPROFILE_LIBRARY
- NAMES hsa-amd-aqlprofile64
- HINTS ${CMAKE_PREFIX_PATH}
- PATHS ${ROCM_ROOT_DIR}
- PATH_SUFFIXES lib REQUIRED)
diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt
index 61782f0..16c83bf 100644
--- a/src/api/CMakeLists.txt
+++ b/src/api/CMakeLists.txt
@@ -51,15 +51,6 @@ find_file(
NO_DEFAULT_PATH REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
-find_library(
- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so"
- HINTS ${CMAKE_PREFIX_PATH}
- PATHS ${ROCM_PATH}
- PATH_SUFFIXES lib)
-
-if(NOT AQLPROFILE_LIB)
- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!")
-endif()
# ########################################################################################
# Adding Old Library Files
@@ -247,7 +238,7 @@ target_include_directories(
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/rocprofiler>
PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler)
target_link_libraries(
- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++
+ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++
dl rocprofiler::build-flags rocprofiler::memcheck)
get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME)
@@ -325,8 +316,7 @@ target_link_options(
-Wl,--no-undefined)
target_link_libraries(
rocprofiler-v2
- PRIVATE ${AQLPROFILE_LIB}
- hsa-runtime64::hsa-runtime64
+ PRIVATE hsa-runtime64::hsa-runtime64
Threads::Threads
atomic
numa
diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp
index 2c47186..6b39634 100644
--- a/src/util/hsa_rsrc_factory.cpp
+++ b/src/util/hsa_rsrc_factory.cpp
@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
if (kern_arg_pool_ == nullptr)
CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
- // Get AqlProfile API table
- aqlprofile_api_ = {};
-#ifdef ROCP_LD_AQLPROFILE
- status = LoadAqlProfileLib(&aqlprofile_api_);
-#else
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
- sizeof(aqlprofile_api_), &aqlprofile_api_);
-#endif
- CHECK_STATUS("aqlprofile API table load failed", status);
-
// Get Loader API table
loader_api_ = {};
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp
index 0a44d18..fab5b75 100644
--- a/test/util/hsa_rsrc_factory.cpp
+++ b/test/util/hsa_rsrc_factory.cpp
@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
- // Get AqlProfile API table
- aqlprofile_api_ = {0};
-#ifdef ROCP_LD_AQLPROFILE
- status = LoadAqlProfileLib(&aqlprofile_api_);
-#else
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
- sizeof(aqlprofile_api_), &aqlprofile_api_);
-#endif
- CHECK_STATUS("aqlprofile API table load failed", status);
-
// Get Loader API table
loader_api_ = {0};
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt
index 107cb51..0f6d4bf 100644
--- a/tests-v2/unittests/core/CMakeLists.txt
+++ b/tests-v2/unittests/core/CMakeLists.txt
@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests")
target_link_libraries(
runCoreUnitTests
- PRIVATE ${AQLPROFILE_LIB}
- test_hsatool_library
+ PRIVATE test_hsatool_library
hsa-runtime64::hsa-runtime64
Threads::Threads
GTest::gtest GTest::gtest_main
@@ -285,4 +284,4 @@ endif()
# for the *_FilePlugin tests
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
-endif()
\ No newline at end of file
+endif()
diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt
index 53180d5..0c4d4a7 100644
--- a/tests-v2/unittests/profiler/CMakeLists.txt
+++ b/tests-v2/unittests/profiler/CMakeLists.txt
@@ -122,7 +122,7 @@ target_compile_definitions(
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_link_libraries(
- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
+ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl)
add_dependencies(tests runUnitTests)
@@ -158,4 +158,4 @@ endif()
# for the *_FilePlugin tests
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
-endif()
\ No newline at end of file
+endif()

View File

@@ -12,13 +12,13 @@
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocr-debug-agent"; pname = "rocr-debug-agent";
version = "6.0.2"; version = "6.3.1";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocr_debug_agent"; repo = "rocr_debug_agent";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-8Q800T7mwBy8/rujVNyCQ0ZpZ9uPKKk+Sv9ibpWou/8="; hash = "sha256-HYag5/E72hopDhS9EVcdyGgSvzbCMzKqLC+SIS28Y9M=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -45,8 +45,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -55,8 +55,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ ncsa ]; license = with licenses; [ ncsa ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -1,34 +1,38 @@
{ lib {
, stdenv lib,
, fetchFromGitHub stdenv,
, rocmUpdateScript fetchFromGitHub,
, cmake rocmUpdateScript,
, rocm-cmake cmake,
, clr rocm-cmake,
, gtest clr,
, gbenchmark gtest,
, buildTests ? false gbenchmark,
, buildBenchmarks ? false buildTests ? false,
, gpuTargets ? [ ] buildBenchmarks ? false,
gpuTargets ? clr.localGpuTargets or [ ],
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocrand"; pname = "rocrand${clr.gpuArchSuffix}";
version = "6.0.2"; version = "6.3.1";
outputs = [ outputs =
"out" [
] ++ lib.optionals buildTests [ "out"
"test" ]
] ++ lib.optionals buildBenchmarks [ ++ lib.optionals buildTests [
"benchmark" "test"
]; ]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "ROCm"; owner = "ROCm";
repo = "rocRAND"; repo = "rocRAND";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-BBkcYOP+zh3OQTxuSkeiJizwnE9Gr5Jbhx0e8SU/mmU="; hash = "sha256-rrRLPqEw39M+6dtPW8DcnQiSZNwxWNINJ1wjU098Vkk=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -37,45 +41,52 @@ stdenv.mkDerivation (finalAttrs: {
clr clr
]; ];
buildInputs = lib.optionals buildTests [ buildInputs =
gtest lib.optionals buildTests [
] ++ lib.optionals buildBenchmarks [ gtest
gbenchmark ]
]; ++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [ cmakeFlags =
"-DCMAKE_C_COMPILER=hipcc" [
"-DCMAKE_CXX_COMPILER=hipcc" "-DHIP_ROOT_DIR=${clr}"
"-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_<DIR>
# Manually define CMAKE_INSTALL_<DIR> # See: https://github.com/NixOS/nixpkgs/pull/197838
# See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_INCLUDEDIR=include" ]
] ++ lib.optionals (gpuTargets != [ ]) [ ++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
] ++ lib.optionals buildTests [ ]
"-DBUILD_TEST=ON" ++ lib.optionals buildTests [
] ++ lib.optionals buildBenchmarks [ "-DBUILD_TEST=ON"
"-DBUILD_BENCHMARK=ON" ]
]; ++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall = lib.optionalString buildTests '' postInstall =
mkdir -p $test/bin lib.optionalString buildTests ''
mv $out/bin/test_* $test/bin mkdir -p $test/bin
'' + lib.optionalString buildBenchmarks '' mv $out/bin/test_* $test/bin
mkdir -p $benchmark/bin ''
mv $out/bin/benchmark_* $benchmark/bin + lib.optionalString buildBenchmarks ''
'' + lib.optionalString (buildTests || buildBenchmarks) '' mkdir -p $benchmark/bin
rm -r $out/bin/rocRAND mv $out/bin/benchmark_* $benchmark/bin
# Fail if bin/ isn't actually empty ''
rmdir $out/bin + lib.optionalString (buildTests || buildBenchmarks) ''
''; rm -r $out/bin/rocRAND
# Fail if bin/ isn't actually empty
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -84,6 +95,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -6,6 +6,7 @@
cmake, cmake,
rocm-cmake, rocm-cmake,
rocblas, rocblas,
rocprim,
rocsparse, rocsparse,
clr, clr,
fmt, fmt,
@@ -14,12 +15,25 @@
lapack-reference, lapack-reference,
buildTests ? false, buildTests ? false,
buildBenchmarks ? false, buildBenchmarks ? false,
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx906:xnack-" ] gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1010"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
]
),
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocsolver"; pname = "rocsolver${clr.gpuArchSuffix}";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -36,12 +50,13 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "rocSOLVER"; repo = "rocSOLVER";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-tglQpwCSFABRuEDiJrzQVFIdx9p85E2MiUYN0aoTAXo="; hash = "sha256-+sGU+0CB48iolJSyYo+xH36q5LCUp+nKtOYbguzMuhg=";
}; };
nativeBuildInputs = nativeBuildInputs =
[ [
cmake cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake rocm-cmake
clr clr
] ]
@@ -51,7 +66,11 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs = buildInputs =
[ [
# FIXME: rocblas and rocsolver can't build in parallel
# but rocsolver doesn't need rocblas' offload builds at build time
# could we build against a rocblas-minimal?
rocblas rocblas
rocprim
rocsparse rocsparse
fmt fmt
] ]
@@ -64,8 +83,9 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_CXX_COMPILER=hipcc" "-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_CXX_FLAGS=-Wno-switch" # Way too many warnings "-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
@@ -96,11 +116,11 @@ stdenv.mkDerivation (finalAttrs: {
''; '';
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = "rocsolver";
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner repo;
repo = finalAttrs.src.repo;
}; };
enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ]; requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; { meta = with lib; {
@@ -111,8 +131,5 @@ stdenv.mkDerivation (finalAttrs: {
platforms = platforms.linux; platforms = platforms.linux;
timeout = 14400; # 4 hours timeout = 14400; # 4 hours
maxSilent = 14400; # 4 hours maxSilent = 14400; # 4 hours
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -15,12 +15,12 @@
python3Packages, python3Packages,
buildTests ? false, buildTests ? false,
buildBenchmarks ? false, # Seems to depend on tests buildBenchmarks ? false, # Seems to depend on tests
gpuTargets ? [ ], gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocsparse"; pname = "rocsparse";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -37,11 +37,12 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "rocSPARSE"; repo = "rocSPARSE";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-nTYnEHkTtq0jBeMj4HXpqkJu8LQc+Z6mpjhMP7tJAHQ="; hash = "sha256-vyLfXbnxPZlR6mfbLh1E7S7HdOSHjuhGQcfihAlvvwY=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
cmake cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake rocm-cmake
clr clr
gfortran gfortran
@@ -61,7 +62,7 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_CXX_COMPILER=hipcc" "-DCMAKE_BUILD_TYPE=Release"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_BINDIR=bin"
@@ -145,8 +146,8 @@ stdenv.mkDerivation (finalAttrs: {
updateScript = rocmUpdateScript { updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
}; };
@@ -156,8 +157,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -15,7 +15,7 @@
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocthrust"; pname = "rocthrust";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -32,7 +32,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "rocThrust"; repo = "rocThrust";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-Zk7FxcedaDUbx9RCX8aWN0xZO/B5cOs/l5MDqZKQpJo="; hash = "sha256-c1+hqP/LipaQ2/lPJo79YBd9H0n0Y7yHkxe0/INE14s=";
}; };
nativeBuildInputs = [ nativeBuildInputs = [
@@ -48,7 +48,6 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_CXX_COMPILER=hipcc"
"-DHIP_ROOT_DIR=${clr}" "-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838 # See: https://github.com/NixOS/nixpkgs/pull/197838
@@ -64,9 +63,6 @@ stdenv.mkDerivation (finalAttrs: {
] ]
++ lib.optionals buildBenchmarks [ ++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARKS=ON" "-DBUILD_BENCHMARKS=ON"
]
++ lib.optionals (buildTests || buildBenchmarks) [
"-DCMAKE_CXX_FLAGS=-Wno-deprecated-builtins" # Too much spam
]; ];
postInstall = postInstall =
@@ -84,8 +80,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -94,8 +90,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ asl20 ]; license = with licenses; [ asl20 ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -19,7 +19,7 @@
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "roctracer"; pname = "roctracer";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -36,7 +36,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "roctracer"; repo = "roctracer";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-a6/N6W3JXVI0VZRGxlS3cVENC3VTP1w9UFnd0+EWAuo="; hash = "sha256-GhnF7rqNLQLLB7nzIp0xNqyqBOwj9ZJ+hzzj1EAaXWU=";
}; };
nativeBuildInputs = nativeBuildInputs =
@@ -106,8 +106,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -116,8 +116,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; # mitx11 license = with licenses; [ mit ]; # mitx11
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor clr.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

View File

@@ -14,12 +14,12 @@
buildExtendedTests ? false, buildExtendedTests ? false,
buildBenchmarks ? false, buildBenchmarks ? false,
buildSamples ? false, buildSamples ? false,
gpuTargets ? [ ], # gpuTargets = [ "gfx908:xnack-" "gfx90a:xnack-" "gfx90a:xnack+" ... ] gpuTargets ? [ ],
}: }:
stdenv.mkDerivation (finalAttrs: { stdenv.mkDerivation (finalAttrs: {
pname = "rocwmma"; pname = "rocwmma";
version = "6.0.2"; version = "6.3.1";
outputs = outputs =
[ [
@@ -39,7 +39,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm"; owner = "ROCm";
repo = "rocWMMA"; repo = "rocWMMA";
rev = "rocm-${finalAttrs.version}"; rev = "rocm-${finalAttrs.version}";
hash = "sha256-vbC4OuCmEpD38lVq0uXNw86iS4KkL6isOVq6vmlu1oM="; hash = "sha256-kih3hn6QhcMmyj9n8f8eO+RIgKQgWKIuzg8fb0eoRPE=";
}; };
patches = lib.optionals (buildTests || buildBenchmarks) [ patches = lib.optionals (buildTests || buildBenchmarks) [
@@ -64,7 +64,9 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags = cmakeFlags =
[ [
"-DCMAKE_CXX_COMPILER=hipcc" "-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
"-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}" "-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}"
"-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}" "-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}"
# Manually define CMAKE_INSTALL_<DIR> # Manually define CMAKE_INSTALL_<DIR>
@@ -105,8 +107,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript { passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname; name = finalAttrs.pname;
owner = finalAttrs.src.owner; inherit (finalAttrs.src) owner;
repo = finalAttrs.src.repo; inherit (finalAttrs.src) repo;
}; };
meta = with lib; { meta = with lib; {
@@ -115,8 +117,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; license = with licenses; [ mit ];
maintainers = teams.rocm.members; maintainers = teams.rocm.members;
platforms = platforms.linux; platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
}; };
}) })

Some files were not shown because too many files have changed in this diff Show More