Skip to content

Commit 7908f8b

Browse files
committed
Add cmake options and fix bad ifdef
1 parent 7ca7306 commit 7908f8b

File tree

3 files changed

+82
-17
lines changed

3 files changed

+82
-17
lines changed

cmake/CMakeLists.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@ option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to prov
104104
option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
105105
option(onnxruntime_USE_VSINPU "Build with VSINPU support" OFF)
106106

107+
# Usually, SIMD instructions in kernel compile fine, and we detect at run-time if they are supported
108+
# But on some platforms, even compiling these SIMD specific instructions could fail.
109+
# So we provide options to disable compiling these SIMD instructions.
110+
option(onnxruntime_DISABLE_SSE4 "Disable compiling kernel with SSE4 instructions" OFF)
111+
option(onnxruntime_DISABLE_AVX "Disable compiling kernel with AVX instructions" OFF)
112+
option(onnxruntime_DISABLE_AVX2 "Disable compiling kernel with AVX2 instructions" OFF)
113+
option(onnxruntime_DISABLE_AVX512 "Disable compiling kernel with AVX512 instructions" OFF)
114+
option(onnxruntime_DISABLE_AMX "Disable compiling kernel with AMX instructions" OFF)
115+
107116
cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
108117
cmake_dependent_option(onnxruntime_USE_LEAN_ATTENTION "Build lean attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA; NOT WIN32" OFF)
109118
option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
@@ -976,6 +985,26 @@ if (onnxruntime_FORCE_GENERIC_ALGORITHMS)
976985
add_compile_definitions(FORCE_GENERIC_ALGORITHMS)
977986
endif()
978987

988+
if (onnxruntime_DISABLE_SSE4)
989+
add_compile_definitions(DISABLE_SSE4)
990+
endif()
991+
992+
if (onnxruntime_DISABLE_AVX)
993+
add_compile_definitions(DISABLE_AVX)
994+
endif()
995+
996+
if (onnxruntime_DISABLE_AVX2)
997+
add_compile_definitions(DISABLE_AVX2)
998+
endif()
999+
1000+
if (onnxruntime_DISABLE_AVX512)
1001+
add_compile_definitions(DISABLE_AVX512)
1002+
endif()
1003+
1004+
if (onnxruntime_DISABLE_AMX)
1005+
add_compile_definitions(DISABLE_AMX)
1006+
endif()
1007+
9791008
if (onnxruntime_ENABLE_LAZY_TENSOR)
9801009
# To support LazyTensor, ORT needs to call Python function from C/C++.
9811010
# so onnxruntime_ENABLE_PYTHON is required.

cmake/onnxruntime_mlas.cmake

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,11 @@ else()
542542
endif()
543543
set_source_files_properties(${mlas_platform_srcs_sse2} PROPERTIES COMPILE_FLAGS "-msse2")
544544

545+
set(mlas_platform_srcs_sse41
546+
${MLAS_SRC_DIR}/qgemm_kernel_sse41.cpp
547+
)
548+
set_source_files_properties(${mlas_platform_srcs_sse41} PROPERTIES COMPILE_FLAGS "-msse4.1")
549+
545550
set(mlas_platform_srcs_avx
546551
${MLAS_SRC_DIR}/x86_64/DgemmKernelAvx.S
547552
${MLAS_SRC_DIR}/x86_64/SgemmKernelAvx.S
@@ -615,36 +620,65 @@ endif()
615620
)
616621
set_source_files_properties(${mlas_platform_srcs_avx512vnni} PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl -mavx512f")
617622

623+
set(mlas_platform_srcs_amx
624+
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
625+
${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
626+
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
627+
)
628+
set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
629+
set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
630+
618631
set(mlas_platform_srcs
619632
${MLAS_SRC_DIR}/activate_fp16.cpp
620633
${MLAS_SRC_DIR}/dwconv.cpp
621634
${MLAS_SRC_DIR}/dgemm.cpp
622635
${MLAS_SRC_DIR}/pooling_fp16.cpp
623-
${MLAS_SRC_DIR}/qgemm_kernel_avx2.cpp
624636
${mlas_platform_srcs_sse2}
625-
${mlas_platform_srcs_avx}
626-
${mlas_platform_srcs_avx2}
627-
${mlas_platform_srcs_avx512f}
628-
${mlas_platform_srcs_avx512core}
629-
${mlas_platform_srcs_avx512vnni}
630637
)
631638

632-
if (NOT onnxruntime_ORT_MINIMAL_BUILD)
639+
if (NOT onnxruntime_DISABLE_SSE4)
640+
set(mlas_platform_srcs
641+
${mlas_platform_srcs}
642+
${mlas_platform_srcs_sse41}
643+
)
644+
endif()
645+
646+
if (NOT onnxruntime_DISABLE_AVX)
647+
set(mlas_platform_srcs
648+
${mlas_platform_srcs}
649+
${mlas_platform_srcs_avx}
650+
)
651+
endif()
652+
653+
if (NOT onnxruntime_DISABLE_AVX2)
654+
set(mlas_platform_srcs
655+
${mlas_platform_srcs}
656+
${mlas_platform_srcs_avx2}
657+
${MLAS_SRC_DIR}/qgemm_kernel_avx2.cpp
658+
)
659+
endif()
660+
661+
if (NOT onnxruntime_DISABLE_AVX512)
662+
set(mlas_platform_srcs
663+
${mlas_platform_srcs}
664+
${mlas_platform_srcs_avx512f}
665+
${mlas_platform_srcs_avx512core}
666+
${mlas_platform_srcs_avx512vnni}
667+
)
668+
endif()
669+
670+
if (NOT onnxruntime_ORT_MINIMAL_BUILD AND NOT onnxruntime_DISABLE_AVX512)
633671
set(mlas_platform_srcs
634672
${mlas_platform_srcs}
635673
${MLAS_SRC_DIR}/q4gemm_avx512.cpp
636674
)
637675
set_source_files_properties(${MLAS_SRC_DIR}/q4gemm_avx512.cpp PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl -mavx512f")
638676
endif()
639-
if(NOT APPLE)
677+
if(NOT APPLE AND NOT onnxruntime_DISABLE_AMX)
640678
set(mlas_platform_srcs
641679
${mlas_platform_srcs}
642-
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
643-
${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
644-
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
645-
)
646-
set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
647-
set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
680+
${mlas_platform_srcs_amx}
681+
)
648682
endif()
649683

650684
if(ONNXRUNTIME_MLAS_MULTI_ARCH)

onnxruntime/core/mlas/lib/platform.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ Return Value:
320320
this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchSse41;
321321
}
322322

323-
#endif
323+
#endif // defined(_MSC_VER) && !defined(ORT_DISABLE_SSE4)
324324

325325
//
326326
// Check if the processor supports the AVX and OSXSAVE features.
@@ -502,7 +502,7 @@ Return Value:
502502
this->GemmS8U8Kernel = MlasGemmS8U8KernelAvx2Vnni;
503503
}
504504

505-
#if !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)
505+
#if !defined(__APPLE__)
506506
#if (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13))
507507
//
508508
// Check if the processor supports AVX NE CONVERT.
@@ -513,6 +513,7 @@ Return Value:
513513
#endif // (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13))
514514

515515

516+
#if !defined(ORT_DISABLE_AMX)
516517
//
517518
// Check if the processor supports AMX-TILE and AMX-INT8
518519
// features.
@@ -525,7 +526,8 @@ Return Value:
525526
this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAmx;
526527
}
527528
}
528-
#endif // !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)
529+
#endif // !defined(ORT_DISABLE_AMX)
530+
#endif // !defined(__APPLE__)
529531

530532
#endif // ORT_MINIMAL_BUILD
531533

0 commit comments

Comments
 (0)