Skip to content

Commit 4159105

Browse files
author
Raghuveer Devulapalli
committed
Update benchmarks to use x86simdsort shared library
1 parent 3899385 commit 4159105

12 files changed

+190
-517
lines changed

benchmarks/bench-qsort-common.h renamed to benchmarks/bench-all.cpp

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
1-
#ifndef AVX512_BENCH_COMMON
2-
#define AVX512_BENCH_COMMON
3-
4-
#include "avx512-16bit-qsort.hpp"
5-
#include "avx512-32bit-qsort.hpp"
6-
#include "avx512-64bit-argsort.hpp"
7-
#include "avx512-64bit-qsort.hpp"
8-
1+
#include "x86simdsort.h"
92
#include "rand_array.h"
103
#include <benchmark/benchmark.h>
114

5+
#ifdef __FLT16_MAX__
6+
template <>
7+
std::vector<_Float16> get_uniform_rand_array(
8+
int64_t arrsize,
9+
_Float16 max,
10+
_Float16 min)
11+
{
12+
(void)(max); (void)(min);
13+
std::vector<_Float16> arr;
14+
for (auto jj = 0; jj < arrsize; ++jj) {
15+
_Float16 temp = (float)rand() / (float)(RAND_MAX);
16+
arr.push_back(temp);
17+
}
18+
return arr;
19+
}
20+
#endif
21+
1222
#define MY_BENCHMARK_CAPTURE(func, T, test_case_name, ...) \
1323
BENCHMARK_PRIVATE_DECLARE(func) \
1424
= (::benchmark::internal::RegisterBenchmarkInternal( \
@@ -18,7 +28,7 @@
1828
func<T>(st, __VA_ARGS__); \
1929
})))
2030

21-
#define BENCH(func, type) \
31+
#define BENCH_SORT(func, type) \
2232
MY_BENCHMARK_CAPTURE(func, type, smallrandom_128, 128, std::string("random")); \
2333
MY_BENCHMARK_CAPTURE(func, type, smallrandom_256, 256, std::string("random")); \
2434
MY_BENCHMARK_CAPTURE(func, type, smallrandom_512, 512, std::string("random")); \
@@ -37,4 +47,14 @@
3747
MY_BENCHMARK_CAPTURE( \
3848
func, type, reverse_10k, 10000, std::string("reverse"));
3949

40-
#endif
50+
#define BENCH_PARTIAL(func, type) \
51+
MY_BENCHMARK_CAPTURE(func, type, k10, 10000, 10); \
52+
MY_BENCHMARK_CAPTURE(func, type, k100, 10000, 100); \
53+
MY_BENCHMARK_CAPTURE(func, type, k1000, 10000, 1000); \
54+
MY_BENCHMARK_CAPTURE(func, type, k5000, 10000, 5000); \
55+
56+
#include "bench-argsort.hpp"
57+
#include "bench-partial-qsort.hpp"
58+
#include "bench-qselect.hpp"
59+
#include "bench-qsort.hpp"
60+

benchmarks/bench-argsort.hpp

Lines changed: 19 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#include "bench-qsort-common.h"
2-
31
template <typename T>
42
std::vector<int64_t> stdargsort(const std::vector<T> &array)
53
{
@@ -16,77 +14,42 @@ std::vector<int64_t> stdargsort(const std::vector<T> &array)
1614
}
1715

1816
template <typename T, class... Args>
19-
static void stdargsort(benchmark::State &state, Args &&...args)
17+
static void scalarargsort(benchmark::State &state, Args &&...args)
2018
{
19+
// get args
2120
auto args_tuple = std::make_tuple(std::move(args)...);
22-
// Perform setup here
23-
size_t ARRSIZE = std::get<0>(args_tuple);
24-
std::vector<T> arr;
25-
std::vector<int64_t> inx;
26-
21+
size_t arrsize = std::get<0>(args_tuple);
2722
std::string arrtype = std::get<1>(args_tuple);
28-
if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); }
29-
else if (arrtype == "sorted") {
30-
arr = get_uniform_rand_array<T>(ARRSIZE);
31-
std::sort(arr.begin(), arr.end());
32-
}
33-
else if (arrtype == "constant") {
34-
T temp = get_uniform_rand_array<T>(1)[0];
35-
for (size_t ii = 0; ii < ARRSIZE; ++ii) {
36-
arr.push_back(temp);
37-
}
38-
}
39-
else if (arrtype == "reverse") {
40-
arr = get_uniform_rand_array<T>(ARRSIZE);
41-
std::sort(arr.begin(), arr.end());
42-
std::reverse(arr.begin(), arr.end());
43-
}
44-
45-
/* call avx512 quicksort */
23+
// set up array
24+
std::vector<T> arr = get_array<T>(arrtype, arrsize);
25+
std::vector<T> arr_bkp = arr;
26+
std::vector<int64_t> inx;
27+
// benchmark
4628
for (auto _ : state) {
4729
inx = stdargsort(arr);
4830
}
4931
}
5032

5133
template <typename T, class... Args>
52-
static void avx512argsort(benchmark::State &state, Args &&...args)
34+
static void simdargsort(benchmark::State &state, Args &&...args)
5335
{
36+
// get args
5437
auto args_tuple = std::make_tuple(std::move(args)...);
55-
if (!__builtin_cpu_supports("avx512bw")) {
56-
state.SkipWithMessage("Requires AVX512 BW ISA");
57-
}
58-
// Perform setup here
59-
size_t ARRSIZE = std::get<0>(args_tuple);
60-
std::vector<T> arr;
61-
std::vector<int64_t> inx;
62-
38+
size_t arrsize = std::get<0>(args_tuple);
6339
std::string arrtype = std::get<1>(args_tuple);
64-
if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); }
65-
else if (arrtype == "sorted") {
66-
arr = get_uniform_rand_array<T>(ARRSIZE);
67-
std::sort(arr.begin(), arr.end());
68-
}
69-
else if (arrtype == "constant") {
70-
T temp = get_uniform_rand_array<T>(1)[0];
71-
for (size_t ii = 0; ii < ARRSIZE; ++ii) {
72-
arr.push_back(temp);
73-
}
74-
}
75-
else if (arrtype == "reverse") {
76-
arr = get_uniform_rand_array<T>(ARRSIZE);
77-
std::sort(arr.begin(), arr.end());
78-
std::reverse(arr.begin(), arr.end());
79-
}
80-
81-
/* call avx512 quicksort */
40+
// set up array
41+
std::vector<T> arr = get_array<T>(arrtype, arrsize);
42+
std::vector<T> arr_bkp = arr;
43+
std::vector<int64_t> inx;
44+
// benchmark
8245
for (auto _ : state) {
83-
inx = avx512_argsort<T>(arr.data(), ARRSIZE);
46+
inx = x86simdsort::argsort(arr.data(), arrsize);
8447
}
8548
}
8649

8750
#define BENCH_BOTH(type) \
88-
BENCH(avx512argsort, type) \
89-
BENCH(stdargsort, type)
51+
BENCH_SORT(simdargsort, type) \
52+
BENCH_SORT(scalarargsort, type)
9053

9154
BENCH_BOTH(int64_t)
9255
BENCH_BOTH(uint64_t)

benchmarks/bench-partial-qsort.hpp

Lines changed: 28 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,34 @@
1-
#include "bench-qsort-common.h"
2-
3-
template <typename T>
4-
static void avx512_partial_qsort(benchmark::State &state)
1+
template <typename T, class... Args>
2+
static void simdpartialsort(benchmark::State &state, Args &&...args)
53
{
6-
if (!__builtin_cpu_supports("avx512bw")) {
7-
state.SkipWithMessage("Requires AVX512 BW ISA");
8-
}
9-
if ((sizeof(T) == 2) && (!__builtin_cpu_supports("avx512vbmi2"))) {
10-
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
11-
}
124
// Perform setup here
13-
int64_t K = state.range(0);
14-
size_t ARRSIZE = 10000;
5+
auto args_tuple = std::make_tuple(std::move(args)...);
6+
int64_t ARRSIZE = std::get<0>(args_tuple);
7+
int64_t k = std::get<1>(args_tuple);
158
std::vector<T> arr;
169
std::vector<T> arr_bkp;
1710

1811
/* Initialize elements */
1912
arr = get_uniform_rand_array<T>(ARRSIZE);
2013
arr_bkp = arr;
2114

22-
/* call avx512_partial_qsort */
15+
/* call simdpartialsort */
2316
for (auto _ : state) {
24-
avx512_partial_qsort<T>(arr.data(), K, ARRSIZE);
17+
x86simdsort::partial_qsort<T>(arr.data(), k, ARRSIZE);
2518

2619
state.PauseTiming();
2720
arr = arr_bkp;
2821
state.ResumeTiming();
2922
}
3023
}
3124

32-
template <typename T>
33-
static void stdpartialsort(benchmark::State &state)
25+
template <typename T, class... Args>
26+
static void scalarpartialsort(benchmark::State &state, Args &&...args)
3427
{
3528
// Perform setup here
36-
int64_t K = state.range(0);
37-
size_t ARRSIZE = 10000;
29+
auto args_tuple = std::make_tuple(std::move(args)...);
30+
int64_t ARRSIZE = std::get<0>(args_tuple);
31+
int64_t k = std::get<1>(args_tuple);
3832
std::vector<T> arr;
3933
std::vector<T> arr_bkp;
4034

@@ -44,59 +38,26 @@ static void stdpartialsort(benchmark::State &state)
4438

4539
/* call std::partial_sort */
4640
for (auto _ : state) {
47-
std::partial_sort(arr.begin(), arr.begin() + K, arr.end());
41+
std::partial_sort(arr.begin(), arr.begin() + k, arr.end());
4842

4943
state.PauseTiming();
5044
arr = arr_bkp;
5145
state.ResumeTiming();
5246
}
5347
}
5448

55-
// Register the function as a benchmark
56-
BENCHMARK(avx512_partial_qsort<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
57-
BENCHMARK(stdpartialsort<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
58-
BENCHMARK(avx512_partial_qsort<uint32_t>)
59-
->Arg(10)
60-
->Arg(100)
61-
->Arg(1000)
62-
->Arg(5000);
63-
BENCHMARK(stdpartialsort<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
64-
BENCHMARK(avx512_partial_qsort<int32_t>)
65-
->Arg(10)
66-
->Arg(100)
67-
->Arg(1000)
68-
->Arg(5000);
69-
BENCHMARK(stdpartialsort<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
70-
71-
BENCHMARK(avx512_partial_qsort<double>)
72-
->Arg(10)
73-
->Arg(100)
74-
->Arg(1000)
75-
->Arg(5000);
76-
BENCHMARK(stdpartialsort<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
77-
BENCHMARK(avx512_partial_qsort<uint64_t>)
78-
->Arg(10)
79-
->Arg(100)
80-
->Arg(1000)
81-
->Arg(5000);
82-
BENCHMARK(stdpartialsort<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
83-
BENCHMARK(avx512_partial_qsort<int64_t>)
84-
->Arg(10)
85-
->Arg(100)
86-
->Arg(1000)
87-
->Arg(5000);
88-
BENCHMARK(stdpartialsort<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
89-
90-
//BENCHMARK(avx512_partial_qsort<float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
91-
BENCHMARK(avx512_partial_qsort<uint16_t>)
92-
->Arg(10)
93-
->Arg(100)
94-
->Arg(1000)
95-
->Arg(5000);
96-
BENCHMARK(stdpartialsort<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
97-
BENCHMARK(avx512_partial_qsort<int16_t>)
98-
->Arg(10)
99-
->Arg(100)
100-
->Arg(1000)
101-
->Arg(5000);
102-
BENCHMARK(stdpartialsort<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
49+
#define BENCH_BOTH_PARTIAL(type) \
50+
BENCH_PARTIAL(simdpartialsort, type) \
51+
BENCH_PARTIAL(scalarpartialsort, type)
52+
53+
BENCH_BOTH_PARTIAL(uint64_t)
54+
BENCH_BOTH_PARTIAL(int64_t)
55+
BENCH_BOTH_PARTIAL(uint32_t)
56+
BENCH_BOTH_PARTIAL(int32_t)
57+
BENCH_BOTH_PARTIAL(uint16_t)
58+
BENCH_BOTH_PARTIAL(int16_t)
59+
BENCH_BOTH_PARTIAL(float)
60+
BENCH_BOTH_PARTIAL(double)
61+
#ifdef __FLT16_MAX__
62+
BENCH_BOTH_PARTIAL(_Float16)
63+
#endif

benchmarks/bench-qselect.hpp

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,10 @@
1-
#include "bench-qsort-common.h"
2-
3-
template <typename T>
4-
static void avx512_qselect(benchmark::State &state)
1+
template <typename T, class... Args>
2+
static void simdqselect(benchmark::State &state, Args &&...args)
53
{
6-
if (!__builtin_cpu_supports("avx512bw")) {
7-
state.SkipWithMessage("Requires AVX512 BW ISA");
8-
}
9-
if ((sizeof(T) == 2) && (!__builtin_cpu_supports("avx512vbmi2"))) {
10-
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
11-
}
124
// Perform setup here
13-
int64_t K = state.range(0);
14-
size_t ARRSIZE = 10000;
5+
auto args_tuple = std::make_tuple(std::move(args)...);
6+
int64_t ARRSIZE = std::get<0>(args_tuple);
7+
int64_t k = std::get<1>(args_tuple);
158
std::vector<T> arr;
169
std::vector<T> arr_bkp;
1710

@@ -21,20 +14,21 @@ static void avx512_qselect(benchmark::State &state)
2114

2215
/* call avx512 quickselect */
2316
for (auto _ : state) {
24-
avx512_qselect<T>(arr.data(), K, ARRSIZE);
17+
x86simdsort::qselect<T>(arr.data(), k, ARRSIZE);
2518

2619
state.PauseTiming();
2720
arr = arr_bkp;
2821
state.ResumeTiming();
2922
}
3023
}
3124

32-
template <typename T>
33-
static void stdnthelement(benchmark::State &state)
25+
template <typename T, class... Args>
26+
static void scalarqselect(benchmark::State &state, Args &&...args)
3427
{
3528
// Perform setup here
36-
int64_t K = state.range(0);
37-
size_t ARRSIZE = 10000;
29+
auto args_tuple = std::make_tuple(std::move(args)...);
30+
int64_t ARRSIZE = std::get<0>(args_tuple);
31+
int64_t k = std::get<1>(args_tuple);
3832
std::vector<T> arr;
3933
std::vector<T> arr_bkp;
4034

@@ -44,31 +38,26 @@ static void stdnthelement(benchmark::State &state)
4438

4539
/* call std::nth_element */
4640
for (auto _ : state) {
47-
std::nth_element(arr.begin(), arr.begin() + K, arr.end());
41+
std::nth_element(arr.begin(), arr.begin() + k, arr.end());
4842

4943
state.PauseTiming();
5044
arr = arr_bkp;
5145
state.ResumeTiming();
5246
}
5347
}
5448

55-
// Register the function as a benchmark
56-
BENCHMARK(avx512_qselect<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
57-
BENCHMARK(stdnthelement<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
58-
BENCHMARK(avx512_qselect<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
59-
BENCHMARK(stdnthelement<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
60-
BENCHMARK(avx512_qselect<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
61-
BENCHMARK(stdnthelement<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
62-
63-
BENCHMARK(avx512_qselect<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
64-
BENCHMARK(stdnthelement<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
65-
BENCHMARK(avx512_qselect<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
66-
BENCHMARK(stdnthelement<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
67-
BENCHMARK(avx512_qselect<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
68-
BENCHMARK(stdnthelement<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
69-
70-
//BENCHMARK(avx512_qselect<float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
71-
BENCHMARK(avx512_qselect<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
72-
BENCHMARK(stdnthelement<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
73-
BENCHMARK(avx512_qselect<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
74-
BENCHMARK(stdnthelement<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
49+
#define BENCH_BOTH_QSELECT(type) \
50+
BENCH_PARTIAL(simdqselect, type) \
51+
BENCH_PARTIAL(scalarqselect, type)
52+
53+
BENCH_BOTH_QSELECT(uint64_t)
54+
BENCH_BOTH_QSELECT(int64_t)
55+
BENCH_BOTH_QSELECT(uint32_t)
56+
BENCH_BOTH_QSELECT(int32_t)
57+
BENCH_BOTH_QSELECT(uint16_t)
58+
BENCH_BOTH_QSELECT(int16_t)
59+
BENCH_BOTH_QSELECT(float)
60+
BENCH_BOTH_QSELECT(double)
61+
#ifdef __FLT16_MAX__
62+
BENCH_BOTH_QSELECT(_Float16)
63+
#endif

benchmarks/bench-qsort.cpp

Lines changed: 0 additions & 4 deletions
This file was deleted.

0 commit comments

Comments
 (0)