|
189 | 189 | "gemm_fp32_mlir_vector_kernel_32_sve": {
|
190 | 190 | "fp32_3x1024_omp_2_mlir": {
|
191 | 191 | "type": "IR-GEN",
|
192 |
| - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 192 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
193 | 193 | "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
194 | 194 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
195 | 195 | "extensions": [ "asimd" ]
|
196 | 196 | },
|
197 | 197 | "fp32_3x1024_omp_4_mlir": {
|
198 | 198 | "type": "IR-GEN",
|
199 |
| - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 199 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
200 | 200 | "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
201 | 201 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
202 | 202 | "extensions": [ "asimd" ]
|
203 | 203 | },
|
204 | 204 | "fp32_3x1024_omp_8_mlir": {
|
205 | 205 | "type": "IR-GEN",
|
206 |
| - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 206 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
207 | 207 | "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
208 | 208 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
209 | 209 | "extensions": [ "asimd" ]
|
210 | 210 | },
|
211 | 211 | "fp32_3x1024_omp_16_mlir": {
|
212 | 212 | "type": "IR-GEN",
|
213 |
| - "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 213 | + "benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
214 | 214 | "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
215 | 215 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
216 | 216 | "extensions": [ "asimd" ]
|
|
220 | 220 | "mlp_fp32_mlir_vector_kernel_32_sve": {
|
221 | 221 | "fp32_3x1024_omp_2_mlir": {
|
222 | 222 | "type": "IR-GEN",
|
223 |
| - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 223 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
224 | 224 | "environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
225 | 225 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
226 | 226 | "extensions": [ "asimd" ]
|
227 | 227 | },
|
228 | 228 | "fp32_3x1024_omp_4_mlir": {
|
229 | 229 | "type": "IR-GEN",
|
230 |
| - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 230 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
231 | 231 | "environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
232 | 232 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
233 | 233 | "extensions": [ "asimd" ]
|
234 | 234 | },
|
235 | 235 | "fp32_3x1024_omp_8_mlir": {
|
236 | 236 | "type": "IR-GEN",
|
237 |
| - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 237 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
238 | 238 | "environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
239 | 239 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
240 | 240 | "extensions": [ "asimd" ]
|
241 | 241 | },
|
242 | 242 | "fp32_3x1024_omp_16_mlir": {
|
243 | 243 | "type": "IR-GEN",
|
244 |
| - "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
| 244 | + "benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=512 --layers=1024,1024,1024,1024 --tiles=32,32,32" ], |
245 | 245 | "environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
|
246 | 246 | "flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --registerBlocking=4,32,1 -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256'" ],
|
247 | 247 | "extensions": [ "asimd" ]
|
|
0 commit comments