Skip to content

Commit 4568d96

Browse files
committed
Add cmake options and fix bad ifdef
1 parent 9e2b7b3 commit 4568d96

File tree

3 files changed

+82
-17
lines changed

3 files changed

+82
-17
lines changed

cmake/CMakeLists.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,15 @@ option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to prov
100100
option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
101101
option(onnxruntime_USE_VSINPU "Build with VSINPU support" OFF)
102102

103+
# Usually, SIMD instructions in kernel compile fine, and we detect at run-time if they are supported
104+
# But on some platforms, even compiling these SIMD specific instructions could fail.
105+
# So we provide options to disable compiling these SIMD instructions.
106+
option(onnxruntime_DISABLE_SSE4 "Disable compiling kernel with SSE4 instructions" OFF)
107+
option(onnxruntime_DISABLE_AVX "Disable compiling kernel with AVX instructions" OFF)
108+
option(onnxruntime_DISABLE_AVX2 "Disable compiling kernel with AVX2 instructions" OFF)
109+
option(onnxruntime_DISABLE_AVX512 "Disable compiling kernel with AVX512 instructions" OFF)
110+
option(onnxruntime_DISABLE_AMX "Disable compiling kernel with AMX instructions" OFF)
111+
103112
cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
104113
option(onnxruntime_USE_LEAN_ATTENTION "Build lean attention kernel for scaled dot product attention" OFF)
105114
cmake_dependent_option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
@@ -976,6 +985,26 @@ if (onnxruntime_FORCE_GENERIC_ALGORITHMS)
976985
add_compile_definitions(FORCE_GENERIC_ALGORITHMS)
977986
endif()
978987

988+
if (onnxruntime_DISABLE_SSE4)
989+
add_compile_definitions(DISABLE_SSE4)
990+
endif()
991+
992+
if (onnxruntime_DISABLE_AVX)
993+
add_compile_definitions(DISABLE_AVX)
994+
endif()
995+
996+
if (onnxruntime_DISABLE_AVX2)
997+
add_compile_definitions(DISABLE_AVX2)
998+
endif()
999+
1000+
if (onnxruntime_DISABLE_AVX512)
1001+
add_compile_definitions(DISABLE_AVX512)
1002+
endif()
1003+
1004+
if (onnxruntime_DISABLE_AMX)
1005+
add_compile_definitions(DISABLE_AMX)
1006+
endif()
1007+
9791008
if (onnxruntime_ENABLE_LAZY_TENSOR)
9801009
# To support LazyTensor, ORT needs to call Python function from C/C++.
9811010
# so onnxruntime_ENABLE_PYTHON is required.

cmake/onnxruntime_mlas.cmake

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,11 @@ else()
620620
endif()
621621
set_source_files_properties(${mlas_platform_srcs_sse2} PROPERTIES COMPILE_FLAGS "-msse2")
622622

623+
set(mlas_platform_srcs_sse41
624+
${MLAS_SRC_DIR}/qgemm_kernel_sse41.cpp
625+
)
626+
set_source_files_properties(${mlas_platform_srcs_sse41} PROPERTIES COMPILE_FLAGS "-msse4.1")
627+
623628
set(mlas_platform_srcs_avx
624629
${MLAS_SRC_DIR}/x86_64/DgemmKernelAvx.S
625630
${MLAS_SRC_DIR}/x86_64/SgemmKernelAvx.S
@@ -697,36 +702,65 @@ endif()
697702
)
698703
set_source_files_properties(${mlas_platform_srcs_avx512vnni} PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl -mavx512f")
699704

705+
set(mlas_platform_srcs_amx
706+
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
707+
${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
708+
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
709+
)
710+
set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
711+
set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
712+
700713
set(mlas_platform_srcs
701714
${MLAS_SRC_DIR}/activate_fp16.cpp
702715
${MLAS_SRC_DIR}/dwconv.cpp
703716
${MLAS_SRC_DIR}/dgemm.cpp
704717
${MLAS_SRC_DIR}/pooling_fp16.cpp
705-
${MLAS_SRC_DIR}/qgemm_kernel_avx2.cpp
706718
${mlas_platform_srcs_sse2}
707-
${mlas_platform_srcs_avx}
708-
${mlas_platform_srcs_avx2}
709-
${mlas_platform_srcs_avx512f}
710-
${mlas_platform_srcs_avx512core}
711-
${mlas_platform_srcs_avx512vnni}
712719
)
713720

714-
if (NOT onnxruntime_ORT_MINIMAL_BUILD)
721+
if (NOT onnxruntime_DISABLE_SSE4)
722+
set(mlas_platform_srcs
723+
${mlas_platform_srcs}
724+
${mlas_platform_srcs_sse41}
725+
)
726+
endif()
727+
728+
if (NOT onnxruntime_DISABLE_AVX)
729+
set(mlas_platform_srcs
730+
${mlas_platform_srcs}
731+
${mlas_platform_srcs_avx}
732+
)
733+
endif()
734+
735+
if (NOT onnxruntime_DISABLE_AVX2)
736+
set(mlas_platform_srcs
737+
${mlas_platform_srcs}
738+
${mlas_platform_srcs_avx2}
739+
${MLAS_SRC_DIR}/qgemm_kernel_avx2.cpp
740+
)
741+
endif()
742+
743+
if (NOT onnxruntime_DISABLE_AVX512)
744+
set(mlas_platform_srcs
745+
${mlas_platform_srcs}
746+
${mlas_platform_srcs_avx512f}
747+
${mlas_platform_srcs_avx512core}
748+
${mlas_platform_srcs_avx512vnni}
749+
)
750+
endif()
751+
752+
if (NOT onnxruntime_ORT_MINIMAL_BUILD AND NOT onnxruntime_DISABLE_AVX512)
715753
set(mlas_platform_srcs
716754
${mlas_platform_srcs}
717755
${MLAS_SRC_DIR}/q4gemm_avx512.cpp
718756
)
719757
set_source_files_properties(${MLAS_SRC_DIR}/q4gemm_avx512.cpp PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl -mavx512f")
720758
endif()
721-
if(NOT APPLE)
759+
if(NOT APPLE AND NOT onnxruntime_DISABLE_AMX)
722760
set(mlas_platform_srcs
723761
${mlas_platform_srcs}
724-
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
725-
${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
726-
${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
727-
)
728-
set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
729-
set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
762+
${mlas_platform_srcs_amx}
763+
)
730764
endif()
731765

732766
if(onnxruntime_ENABLE_CONVSYMKERNELAVX2_SAT_CHECKER)

onnxruntime/core/mlas/lib/platform.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ Return Value:
326326
this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchSse41;
327327
}
328328

329-
#endif
329+
#endif // defined(_MSC_VER) && !defined(ORT_DISABLE_SSE4)
330330

331331
//
332332
// Check if the processor supports the AVX and OSXSAVE features.
@@ -513,7 +513,7 @@ Return Value:
513513
this->GemmS8U8Kernel = MlasGemmS8U8KernelAvx2Vnni;
514514
}
515515

516-
#if !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)
516+
#if !defined(__APPLE__)
517517
#if (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13))
518518
//
519519
// Check if the processor supports AVX NE CONVERT.
@@ -524,6 +524,7 @@ Return Value:
524524
#endif // (defined(_MSC_VER) && (_MSC_VER >= 1933)) || (defined(__GNUC__) && (__GNUC__ >= 13))
525525

526526

527+
#if !defined(ORT_DISABLE_AMX)
527528
//
528529
// Check if the processor supports AMX-TILE and AMX-INT8
529530
// features.
@@ -536,7 +537,8 @@ Return Value:
536537
this->GemmU8S8Dispatch = &MlasGemmU8S8DispatchAmx;
537538
}
538539
}
539-
#endif // !defined(__APPLE__) && !defined(ORT_DISABLE_AMX)
540+
#endif // !defined(ORT_DISABLE_AMX)
541+
#endif // !defined(__APPLE__)
540542

541543
#endif // ORT_MINIMAL_BUILD
542544

0 commit comments

Comments
 (0)