Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-numpy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:
jobs:
NumPyMultiarrayTests:

runs-on: ubuntu-latest
runs-on: intel-ubuntu-latest

steps:
- name: Checkout x86-simd-sort
Expand Down
105 changes: 70 additions & 35 deletions .github/workflows/c-cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@ on:
branches: [ "main" ]

jobs:
ICX:
SKL:

runs-on: ubuntu-latest
runs-on: intel-ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Install dependencies
run: |
sudo apt update
sudo apt -y install g++-10 libgtest-dev meson curl git cmake
sudo apt -y install g++-13 libgtest-dev meson curl git cmake

- name: Install google benchmarks
run: |
Expand All @@ -29,33 +29,33 @@ jobs:

- name: Install Intel SDE
run: |
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/732268/sde-external-9.7.0-2022-05-09-lin.tar.xz
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde

- name: Build
env:
CXX: g++-10
CXX: g++-13
run: |
make clean
meson setup --warnlevel 2 --werror --buildtype plain builddir
meson setup --warnlevel 2 --werror --buildtype release builddir
cd builddir
ninja

- name: Run test suite on ICX
run: sde -icx -- ./builddir/testexe
- name: Run test suite on SKL
run: sde -skl -- ./builddir/testexe

SPR:
SKX:

runs-on: ubuntu-latest
runs-on: intel-ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Install dependencies
run: |
sudo apt update
sudo apt -y install g++-12 libgtest-dev meson curl git cmake
sudo apt -y install g++-13 libgtest-dev meson curl git cmake

- name: Install google benchmarks
run: |
Expand All @@ -67,58 +67,93 @@ jobs:

- name: Install Intel SDE
run: |
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/732268/sde-external-9.7.0-2022-05-09-lin.tar.xz
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde

- name: Build
env:
CXX: g++-12
CXX: g++-13
run: |
make clean
meson setup --warnlevel 2 --werror --buildtype plain builddir
meson setup --warnlevel 2 --werror --buildtype release builddir
cd builddir
ninja

- name: Run _Float16 test suite on SPR
run: sde -spr -- ./builddir/testexe --gtest_filter="*float16*"
- name: Run test suite on SKX
run: sde -skx -- ./builddir/testexe

compare-benchmarks-with-main:
if: ${{ false }} # disable for now
TGL:

runs-on: ubuntu-latest
runs-on: intel-ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
path: x86-simd-sort

- name: Specify branch name
working-directory: ${{ github.workspace }}/x86-simd-sort
run: git switch -c pr-branch
- name: Install dependencies
run: |
sudo apt update
sudo apt -y install g++-13 libgtest-dev meson curl git cmake

- name: Install google benchmarks
run: |
git clone https://github.com/google/benchmark.git
cd benchmark
cmake -E make_directory "build"
cmake -E chdir "build" cmake -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_BUILD_TYPE=Release ../
sudo cmake --build "build" --config Release --target install

- name: Install Intel SDE
run: |
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde

- name: Build
env:
CXX: g++-13
run: |
make clean
meson setup --warnlevel 2 --werror --buildtype release builddir
cd builddir
ninja
- name: Run test suite on TGL
run: sde -tgl -- ./builddir/testexe

SPR:

- uses: actions/setup-python@v4
with:
python-version: '3.9'
runs-on: intel-ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Install dependencies
run: |
sudo apt update
sudo apt -y install g++-12 libgtest-dev meson curl git cmake
sudo apt -y install g++-13 libgtest-dev meson curl git cmake

- name: Install google benchmarks
run: |
git clone https://github.com/google/benchmark.git
cd benchmark
pip3 install -r tools/requirements.txt
cmake -E make_directory "build"
cmake -E chdir "build" cmake -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_BUILD_TYPE=Release ../
sudo cmake --build "build" --config Release --target install

- name: Run bench-compare
working-directory: ${{ github.workspace }}/x86-simd-sort
- name: Install Intel SDE
run: |
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde

- name: Build
env:
CXX: g++-12
GBENCH: ${{ github.workspace }}/benchmark
run: bash -x scripts/branch-compare.sh avx
CXX: g++-13
run: |
make clean
meson setup --warnlevel 2 --werror --buildtype release builddir
cd builddir
ninja

- name: Run test suite on SPR
run: sde -spr -- ./builddir/testexe
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,14 @@ benchexe: $(BENCHOBJS) $(UTILOBJS)

.PHONY: meson
meson:
meson setup --warnlevel 2 --werror --buildtype plain builddir
meson setup --warnlevel 2 --werror --buildtype release builddir
cd builddir && ninja

.PHONY: mesondebug
mesondebug:
meson setup --warnlevel 2 --werror --buildtype debug debug
cd debug && ninja

.PHONY: clean
clean:
$(RM) -rf $(TESTOBJS) $(BENCHOBJS) $(UTILOBJS) testexe benchexe builddir
2 changes: 1 addition & 1 deletion _clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
NamespaceIndentation: Inner
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
Expand Down
34 changes: 20 additions & 14 deletions benchmarks/bench-qsort-common.h → benchmarks/bench-all.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
#ifndef AVX512_BENCH_COMMON
#define AVX512_BENCH_COMMON

#include "avx512-16bit-qsort.hpp"
#include "avx512-32bit-qsort.hpp"
#include "avx512-64bit-argsort.hpp"
#include "avx512-64bit-qsort.hpp"

#include "rand_array.h"
#include "x86simdsort.h"
#include <benchmark/benchmark.h>

#define MY_BENCHMARK_CAPTURE(func, T, test_case_name, ...) \
Expand All @@ -18,11 +11,15 @@
func<T>(st, __VA_ARGS__); \
})))

#define BENCH(func, type) \
MY_BENCHMARK_CAPTURE(func, type, smallrandom_128, 128, std::string("random")); \
MY_BENCHMARK_CAPTURE(func, type, smallrandom_256, 256, std::string("random")); \
MY_BENCHMARK_CAPTURE(func, type, smallrandom_512, 512, std::string("random")); \
MY_BENCHMARK_CAPTURE(func, type, smallrandom_1k, 1024, std::string("random")); \
#define BENCH_SORT(func, type) \
MY_BENCHMARK_CAPTURE( \
func, type, smallrandom_128, 128, std::string("random")); \
MY_BENCHMARK_CAPTURE( \
func, type, smallrandom_256, 256, std::string("random")); \
MY_BENCHMARK_CAPTURE( \
func, type, smallrandom_512, 512, std::string("random")); \
MY_BENCHMARK_CAPTURE( \
func, type, smallrandom_1k, 1024, std::string("random")); \
MY_BENCHMARK_CAPTURE(func, type, random_5k, 5000, std::string("random")); \
MY_BENCHMARK_CAPTURE( \
func, type, random_100k, 100000, std::string("random")); \
Expand All @@ -37,4 +34,13 @@
MY_BENCHMARK_CAPTURE( \
func, type, reverse_10k, 10000, std::string("reverse"));

#endif
#define BENCH_PARTIAL(func, type) \
MY_BENCHMARK_CAPTURE(func, type, k10, 10000, 10); \
MY_BENCHMARK_CAPTURE(func, type, k100, 10000, 100); \
MY_BENCHMARK_CAPTURE(func, type, k1000, 10000, 1000); \
MY_BENCHMARK_CAPTURE(func, type, k5000, 10000, 5000);

#include "bench-argsort.hpp"
#include "bench-partial-qsort.hpp"
#include "bench-qselect.hpp"
#include "bench-qsort.hpp"
79 changes: 20 additions & 59 deletions benchmarks/bench-argsort.hpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
#include "bench-qsort-common.h"

template <typename T>
std::vector<int64_t> stdargsort(const std::vector<T> &array)
std::vector<size_t> stdargsort(const std::vector<T> &array)
{
std::vector<int64_t> indices(array.size());
std::vector<size_t> indices(array.size());
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(),
indices.end(),
[&array](int64_t left, int64_t right) -> bool {
[&array](size_t left, size_t right) -> bool {
// sort indices according to corresponding array element
return array[left] < array[right];
});
Expand All @@ -16,77 +14,40 @@ std::vector<int64_t> stdargsort(const std::vector<T> &array)
}

template <typename T, class... Args>
static void stdargsort(benchmark::State &state, Args &&...args)
static void scalarargsort(benchmark::State &state, Args &&...args)
{
// get args
auto args_tuple = std::make_tuple(std::move(args)...);
// Perform setup here
size_t ARRSIZE = std::get<0>(args_tuple);
std::vector<T> arr;
std::vector<int64_t> inx;

size_t arrsize = std::get<0>(args_tuple);
std::string arrtype = std::get<1>(args_tuple);
if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); }
else if (arrtype == "sorted") {
arr = get_uniform_rand_array<T>(ARRSIZE);
std::sort(arr.begin(), arr.end());
}
else if (arrtype == "constant") {
T temp = get_uniform_rand_array<T>(1)[0];
for (size_t ii = 0; ii < ARRSIZE; ++ii) {
arr.push_back(temp);
}
}
else if (arrtype == "reverse") {
arr = get_uniform_rand_array<T>(ARRSIZE);
std::sort(arr.begin(), arr.end());
std::reverse(arr.begin(), arr.end());
}

/* call avx512 quicksort */
// set up array
std::vector<T> arr = get_array<T>(arrtype, arrsize);
std::vector<size_t> inx;
// benchmark
for (auto _ : state) {
inx = stdargsort(arr);
}
}

template <typename T, class... Args>
static void avx512argsort(benchmark::State &state, Args &&...args)
static void simdargsort(benchmark::State &state, Args &&...args)
{
// get args
auto args_tuple = std::make_tuple(std::move(args)...);
if (!__builtin_cpu_supports("avx512bw")) {
state.SkipWithMessage("Requires AVX512 BW ISA");
}
// Perform setup here
size_t ARRSIZE = std::get<0>(args_tuple);
std::vector<T> arr;
std::vector<int64_t> inx;

size_t arrsize = std::get<0>(args_tuple);
std::string arrtype = std::get<1>(args_tuple);
if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); }
else if (arrtype == "sorted") {
arr = get_uniform_rand_array<T>(ARRSIZE);
std::sort(arr.begin(), arr.end());
}
else if (arrtype == "constant") {
T temp = get_uniform_rand_array<T>(1)[0];
for (size_t ii = 0; ii < ARRSIZE; ++ii) {
arr.push_back(temp);
}
}
else if (arrtype == "reverse") {
arr = get_uniform_rand_array<T>(ARRSIZE);
std::sort(arr.begin(), arr.end());
std::reverse(arr.begin(), arr.end());
}

/* call avx512 quicksort */
// set up array
std::vector<T> arr = get_array<T>(arrtype, arrsize);
std::vector<size_t> inx;
// benchmark
for (auto _ : state) {
inx = avx512_argsort<T>(arr.data(), ARRSIZE);
inx = x86simdsort::argsort(arr.data(), arrsize);
}
}

#define BENCH_BOTH(type) \
BENCH(avx512argsort, type) \
BENCH(stdargsort, type)
BENCH_SORT(simdargsort, type) \
BENCH_SORT(scalarargsort, type)

BENCH_BOTH(int64_t)
BENCH_BOTH(uint64_t)
Expand Down
Loading