|
93 | 93 | }
|
94 | 94 | }},
|
95 | 95 | {
|
| 96 | + "gemm_bf16_splat_dp2_mlir": { |
| 97 | + "bf16_dp2_3x1024_omp_2_mlir": { |
| 98 | + "type": "IR-GEN", |
| 99 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 100 | + "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 101 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,16'" ], |
| 102 | + "extensions": [ "(avx2)" ] |
| 103 | + }, |
| 104 | + "bf16_dp2_3x1024_omp_4_mlir": { |
| 105 | + "type": "IR-GEN", |
| 106 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 107 | + "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 108 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,8'" ], |
| 109 | + "extensions": [ "(avx2)" ] |
| 110 | + }, |
| 111 | + "bf16_dp2_3x1024_omp_8_mlir": { |
| 112 | + "type": "IR-GEN", |
| 113 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 114 | + "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 115 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=4,8'" ], |
| 116 | + "extensions": [ "(avx2)" ] |
| 117 | + }, |
| 118 | + "bf16_dp2_3x1024_omp_16_mlir": { |
| 119 | + "type": "IR-GEN", |
| 120 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 121 | + "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 122 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=2,8'" ], |
| 123 | + "extensions": [ "(avx2)" ] |
| 124 | + } |
| 125 | + }}, |
| 126 | + { |
| 127 | + "gemm_bf16_splat_dp2_mlir_vector_kernel_avx512": { |
| 128 | + "bf16_dp2_3x1024_omp_2_mlir": { |
| 129 | + "type": "IR-GEN", |
| 130 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 131 | + "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 132 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,16 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 133 | + "extensions": [ "avx512_bf16" ] |
| 134 | + }, |
| 135 | + "bf16_dp2_3x1024_omp_4_mlir": { |
| 136 | + "type": "IR-GEN", |
| 137 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 138 | + "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 139 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,8 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 140 | + "extensions": [ "avx512_bf16" ] |
| 141 | + }, |
| 142 | + "bf16_dp2_3x1024_omp_8_mlir": { |
| 143 | + "type": "IR-GEN", |
| 144 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 145 | + "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 146 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=4,8 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 147 | + "extensions": [ "avx512_bf16" ] |
| 148 | + }, |
| 149 | + "bf16_dp2_3x1024_omp_16_mlir": { |
| 150 | + "type": "IR-GEN", |
| 151 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 152 | + "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 153 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=2,8 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 154 | + "extensions": [ "avx512_bf16" ] |
| 155 | + } |
| 156 | + }}, |
| 157 | + { |
96 | 158 | "mlp_bf16_dp2_mlir": {
|
97 | 159 | "bf16_dp2_3x1024_omp_2_mlir": {
|
98 | 160 | "type": "IR-GEN",
|
|
186 | 248 | }
|
187 | 249 | }},
|
188 | 250 | {
|
| 251 | + "mlp_bf16_splat_dp2_mlir": { |
| 252 | + "bf16_dp2_3x1024_omp_2_mlir": { |
| 253 | + "type": "IR-GEN", |
| 254 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 255 | + "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 256 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,16'" ], |
| 257 | + "extensions": [ "(avx2)" ] |
| 258 | + }, |
| 259 | + "bf16_dp2_3x1024_omp_4_mlir": { |
| 260 | + "type": "IR-GEN", |
| 261 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 262 | + "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 263 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,8'" ], |
| 264 | + "extensions": [ "(avx2)" ] |
| 265 | + }, |
| 266 | + "bf16_dp2_3x1024_omp_8_mlir": { |
| 267 | + "type": "IR-GEN", |
| 268 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 269 | + "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 270 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=4,8'" ], |
| 271 | + "extensions": [ "(avx2)" ] |
| 272 | + }, |
| 273 | + "bf16_dp2_3x1024_omp_16_mlir": { |
| 274 | + "type": "IR-GEN", |
| 275 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 276 | + "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 277 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=2,8'" ], |
| 278 | + "extensions": [ "(avx2)" ] |
| 279 | + } |
| 280 | + }}, |
| 281 | + { |
| 282 | + "mlp_bf16_splat_dp2_mlir_vector_kernel_avx512": { |
| 283 | + "bf16_dp2_3x1024_omp_2_mlir": { |
| 284 | + "type": "IR-GEN", |
| 285 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 286 | + "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 287 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,16 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 288 | + "extensions": [ "avx512_bf16" ] |
| 289 | + }, |
| 290 | + "bf16_dp2_3x1024_omp_4_mlir": { |
| 291 | + "type": "IR-GEN", |
| 292 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 293 | + "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 294 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=8,8 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 295 | + "extensions": [ "avx512_bf16" ] |
| 296 | + }, |
| 297 | + "bf16_dp2_3x1024_omp_8_mlir": { |
| 298 | + "type": "IR-GEN", |
| 299 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 300 | + "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 301 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=4,8 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 302 | + "extensions": [ "avx512_bf16" ] |
| 303 | + }, |
| 304 | + "bf16_dp2_3x1024_omp_16_mlir": { |
| 305 | + "type": "IR-GEN", |
| 306 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=0" ], |
| 307 | + "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" }, |
| 308 | + "flags": [ "-n", "100", "-run-args='--disable-vnni-packing --def-parallel --parallel-task-grid=2,8 --vector-to-kernels --registerBlocking=8,32,2'" ], |
| 309 | + "extensions": [ "avx512_bf16" ] |
| 310 | + } |
| 311 | + }}, |
| 312 | + { |
189 | 313 | "gemm_bf16_dp4_mlir": {
|
190 | 314 | "bf16_dp4_3x1024_omp_2_mlir": {
|
191 | 315 | "type": "IR-GEN",
|
|
0 commit comments