From 4aed649810ecea179aa10a9ebfcfba1f7313c067 Mon Sep 17 00:00:00 2001 From: SteelPh0enix Date: Tue, 2 Sep 2025 15:44:27 +0200 Subject: [PATCH 1/4] nix: Added missing packages and options for ROCm build --- .devops/nix/package.nix | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 651a54db4c203..a787943ea379c 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -34,6 +34,7 @@ rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets, enableCurl ? true, useVulkan ? false, + buildAllCudaFaQuants ? false, llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake # It's necessary to consistently use backendStdenv when building with CUDA support, @@ -95,6 +96,8 @@ let clr hipblas rocblas + llvm.lld + llvm.bintools ]; vulkanBuildInputs = [ @@ -160,7 +163,8 @@ effectiveStdenv.mkDerivation (finalAttrs: { buildInputs = optionals effectiveStdenv.isDarwin darwinBuildInputs ++ optionals useCuda cudaBuildInputs - ++ optionals useMpi [ mpi ] + ++ optionals (useMpi && !useRocm) [ mpi ] + ++ optionals (useMpi && useRocm) [ rocmPackages.mpi ] ++ optionals useRocm rocmBuildInputs ++ optionals useBlas [ blas ] ++ optionals useVulkan vulkanBuildInputs @@ -187,10 +191,12 @@ effectiveStdenv.mkDerivation (finalAttrs: { builtins.concatStringsSep ";" (map dropDot cudaCapabilities) ) ) + (cmakeBool "GGML_CUDA_FA_ALL_QUANTS" buildAllCudaFaQuants) ] ++ optionals useRocm [ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets) + (cmakeBool "GGML_CUDA_FA_ALL_QUANTS" buildAllCudaFaQuants) ] ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") From 9aa7ce3ae7e9c0208bd16a9ac9cf2d82069bd3b0 Mon Sep 17 00:00:00 2001 From: SteelPh0enix Date: Tue, 2 Sep 2025 18:54:29 +0200 Subject: [PATCH 2/4] nix: Renamed CMAKE_HIP_ARCHITECTURES to AMDGPU_TARGETS --- .devops/nix/package.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index a787943ea379c..025c38386f69b 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -195,7 +195,7 @@ effectiveStdenv.mkDerivation (finalAttrs: { ] ++ optionals useRocm [ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") - (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets) + (cmakeFeature "AMDGPU_TARGETS" rocmGpuTargets) (cmakeBool "GGML_CUDA_FA_ALL_QUANTS" buildAllCudaFaQuants) ] ++ optionals useMetalKit [ From bbb4973b97b0d1dccdaba29a40fb44fb5ae0b4e6 Mon Sep 17 00:00:00 2001 From: SteelPh0enix Date: Tue, 2 Sep 2025 20:09:10 +0200 Subject: [PATCH 3/4] nix: Added and enabled rocmWmma support along with ROWMMA_PATH option in CMake --- .devops/nix/package.nix | 22 +++++++++++++++------- ggml/src/ggml-hip/CMakeLists.txt | 5 ++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 025c38386f69b..f7c0eca5d18e5 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -32,6 +32,7 @@ useMpi ? false, useRocm ? config.rocmSupport, rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets, + rocmUseWmma ? true, enableCurl ? true, useVulkan ? false, buildAllCudaFaQuants ? false, @@ -92,13 +93,16 @@ let libcublas ]; - rocmBuildInputs = with rocmPackages; [ - clr - hipblas - rocblas - llvm.lld - llvm.bintools - ]; + rocmBuildInputs = + with rocmPackages; + [ + clr + hipblas + rocblas + llvm.lld + llvm.bintools + ] + ++ optionals rocmUseWmma [ rocmPackages.rocwmma ]; vulkanBuildInputs = [ vulkan-headers @@ -198,6 +202,10 @@ effectiveStdenv.mkDerivation (finalAttrs: { (cmakeFeature "AMDGPU_TARGETS" rocmGpuTargets) (cmakeBool "GGML_CUDA_FA_ALL_QUANTS" buildAllCudaFaQuants) ] + ++ optionals rocmUseWmma [ + (cmakeBool "GGML_HIP_ROCWMMA_FATTN" rocmUseWmma) + (cmakeFeature "GGML_HIP_ROCWMMA_PATH" "${rocmPackages.rocwmma}") + ] ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index d327b90cceb25..ea55a3fe088b9 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -39,7 +39,7 @@ endif() find_package(hip REQUIRED) find_package(hipblas REQUIRED) find_package(rocblas REQUIRED) -if (GGML_HIP_ROCWMMA_FATTN) +if (GGML_HIP_ROCWMMA_FATTN AND NOT GGML_HIP_ROCWMMA_PATH) CHECK_INCLUDE_FILE_CXX("rocwmma/rocwmma.hpp" FOUND_ROCWMMA) if (NOT ${FOUND_ROCWMMA}) message(FATAL_ERROR "rocwmma has not been found") @@ -111,6 +111,9 @@ endif() if (GGML_HIP_ROCWMMA_FATTN) add_compile_definitions(GGML_HIP_ROCWMMA_FATTN) + if (GGML_HIP_ROCWMMA_PATH) + target_include_directories(ggml-hip PRIVATE ${GGML_HIP_ROCWMMA_PATH}/include) + endif() endif() if (NOT GGML_HIP_MMQ_MFMA) From e514eac597b8ec26a529f127ec407d8423a75d5d Mon Sep 17 00:00:00 2001 From: SteelPh0enix Date: Tue, 2 Sep 2025 20:18:55 +0200 Subject: [PATCH 4/4] nix: Added enableUma option --- .devops/nix/package.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index f7c0eca5d18e5..d506160eb02ee 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -36,6 +36,7 @@ enableCurl ? true, useVulkan ? false, buildAllCudaFaQuants ? false, + enableUma ? false, llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake # It's necessary to consistently use backendStdenv when building with CUDA support, @@ -196,11 +197,13 @@ effectiveStdenv.mkDerivation (finalAttrs: { ) ) (cmakeBool "GGML_CUDA_FA_ALL_QUANTS" buildAllCudaFaQuants) + (cmakeBool "GGML_CUDA_ENABLE_UNIFIED_MEMORY" enableUma) ] ++ optionals useRocm [ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") (cmakeFeature "AMDGPU_TARGETS" rocmGpuTargets) (cmakeBool "GGML_CUDA_FA_ALL_QUANTS" buildAllCudaFaQuants) + (cmakeBool "GGML_CUDA_ENABLE_UNIFIED_MEMORY" enableUma) ] ++ optionals rocmUseWmma [ (cmakeBool "GGML_HIP_ROCWMMA_FATTN" rocmUseWmma)