Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 0 additions & 78 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,88 +1,10 @@
# When unset, discover g++. Prioritise the latest version on the path.
ifeq (, $(and $(strip $(CXX)), $(filter-out default undefined, $(origin CXX))))
override CXX := $(shell which g++-13 g++-12 g++-11 g++-10 g++-9 g++-8 g++ 2>/dev/null | head -n 1)
ifeq (, $(strip $(CXX)))
$(error Could not locate the g++ compiler. Please manually specify its path using the CXX variable)
endif
endif

export CXX
CXXFLAGS += $(OPTIMFLAG) $(MARCHFLAG)
override CXXFLAGS += -I$(SRCDIR) -I$(UTILSDIR)
GTESTCFLAGS := `pkg-config --cflags gtest_main`
GTESTLDFLAGS := `pkg-config --static --libs gtest_main`
GBENCHCFLAGS := `pkg-config --cflags benchmark`
GBENCHLDFLAGS := `pkg-config --static --libs benchmark`
OPTIMFLAG := -O3
MARCHFLAG := -march=sapphirerapids

SRCDIR := ./src
TESTDIR := ./tests
BENCHDIR := ./benchmarks
UTILSDIR := ./utils

SRCS := $(wildcard $(addprefix $(SRCDIR)/, *.hpp *.h))
UTILSRCS := $(wildcard $(addprefix $(UTILSDIR)/, *.hpp *.h))
TESTSRCS := $(wildcard $(addprefix $(TESTDIR)/, *.hpp *.h))
BENCHSRCS := $(wildcard $(addprefix $(BENCHDIR)/, *.hpp *.h))
UTILS := $(wildcard $(UTILSDIR)/*.cpp)
TESTS := $(wildcard $(TESTDIR)/*.cpp)
BENCHS := $(wildcard $(BENCHDIR)/*.cpp)

test_cxx_flag = $(shell 2>/dev/null $(CXX) -o /dev/null $(1) -c -x c++ /dev/null; echo $$?)

# Compiling AVX512-FP16 instructions wasn't possible until GCC 12
ifeq ($(call test_cxx_flag,-mavx512fp16), 1)
BENCHS_SKIP += bench-qsortfp16.cpp
TESTS_SKIP += test-qsortfp16.cpp
endif

# Sapphire Rapids was otherwise supported from GCC 11. Downgrade if required.
ifeq ($(call test_cxx_flag,$(MARCHFLAG)), 1)
MARCHFLAG := -march=icelake-client
endif

BENCHOBJS := $(patsubst %.cpp, %.o, $(filter-out $(addprefix $(BENCHDIR)/, $(BENCHS_SKIP)), $(BENCHS)))
TESTOBJS := $(patsubst %.cpp, %.o, $(filter-out $(addprefix $(TESTDIR)/, $(TESTS_SKIP)), $(TESTS)))
UTILOBJS := $(UTILS:.cpp=.o)

# Stops make from wondering if it needs to generate the .hpp files (.cpp and .h have equivalent rules by default)
%.hpp:

.PHONY: all
.DEFAULT_GOAL := all
all: test bench

.PHONY: test
test: testexe

.PHONY: bench
bench: benchexe

$(UTILOBJS): $(UTILSRCS)

$(TESTOBJS): $(TESTSRCS) $(UTILSRCS) $(SRCS)
$(TESTDIR)/%.o: override CXXFLAGS += $(GTESTCFLAGS)

testexe: $(TESTOBJS) $(UTILOBJS)
$(CXX) $(CXXFLAGS) $^ $(LDLIBS) $(LDFLAGS) -lgtest_main $(GTESTLDFLAGS) -o $@

$(BENCHOBJS): $(BENCHSRCS) $(UTILSRCS) $(SRCS)
$(BENCHDIR)/%.o: override CXXFLAGS += $(GBENCHCFLAGS)

benchexe: $(BENCHOBJS) $(UTILOBJS)
$(CXX) $(CXXFLAGS) $^ $(LDLIBS) $(LDFLAGS) -lbenchmark_main $(GBENCHLDFLAGS) -o $@

.PHONY: meson
meson:
meson setup --warnlevel 2 --werror --buildtype release builddir
cd builddir && ninja

.PHONY: mesondebug
mesondebug:
meson setup --warnlevel 2 --werror --buildtype debug debug
cd debug && ninja

.PHONY: clean
clean:
$(RM) -rf $(TESTOBJS) $(BENCHOBJS) $(UTILOBJS) testexe benchexe builddir
6 changes: 3 additions & 3 deletions lib/x86simdsort-avx2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

#define DEFINE_ALL_METHODS(type) \
template <> \
void qsort(type *arr, size_t arrsize) \
void qsort(type *arr, size_t arrsize, bool hasnan) \
{ \
avx2_qsort(arr, arrsize); \
avx2_qsort(arr, arrsize, hasnan); \
} \
template <> \
void qselect(type *arr, size_t k, size_t arrsize, bool hasnan) \
Expand All @@ -24,5 +24,5 @@ namespace avx2 {
DEFINE_ALL_METHODS(uint32_t)
DEFINE_ALL_METHODS(int32_t)
DEFINE_ALL_METHODS(float)
} // namespace avx512
} // namespace avx2
} // namespace xss
8 changes: 4 additions & 4 deletions lib/x86simdsort-icl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
namespace xss {
namespace avx512 {
template <>
void qsort(uint16_t *arr, size_t size)
void qsort(uint16_t *arr, size_t size, bool hasnan)
{
avx512_qsort(arr, size);
avx512_qsort(arr, size, hasnan);
}
template <>
void qselect(uint16_t *arr, size_t k, size_t arrsize, bool hasnan)
Expand All @@ -20,9 +20,9 @@ namespace avx512 {
avx512_partial_qsort(arr, k, arrsize, hasnan);
}
template <>
void qsort(int16_t *arr, size_t size)
void qsort(int16_t *arr, size_t size, bool hasnan)
{
avx512_qsort(arr, size);
avx512_qsort(arr, size, hasnan);
}
template <>
void qselect(int16_t *arr, size_t k, size_t arrsize, bool hasnan)
Expand Down
21 changes: 12 additions & 9 deletions lib/x86simdsort-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace xss {
namespace avx512 {
// quicksort
template <typename T>
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize);
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false);
// quickselect
template <typename T>
XSS_HIDE_SYMBOL void
Expand All @@ -19,16 +19,17 @@ namespace avx512 {
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan = false);
// argsort
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t> argsort(T *arr, size_t arrsize);
XSS_HIDE_SYMBOL std::vector<size_t>
argsort(T *arr, size_t arrsize, bool hasnan = false);
// argselect
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t>
argselect(T *arr, size_t k, size_t arrsize);
argselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);
} // namespace avx512
namespace avx2 {
// quicksort
template <typename T>
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize);
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false);
// quickselect
template <typename T>
XSS_HIDE_SYMBOL void
Expand All @@ -39,16 +40,17 @@ namespace avx2 {
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan = false);
// argsort
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t> argsort(T *arr, size_t arrsize);
XSS_HIDE_SYMBOL std::vector<size_t>
argsort(T *arr, size_t arrsize, bool hasnan = false);
// argselect
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t>
argselect(T *arr, size_t k, size_t arrsize);
argselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);
} // namespace avx2
namespace scalar {
// quicksort
template <typename T>
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize);
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false);
// quickselect
template <typename T>
XSS_HIDE_SYMBOL void
Expand All @@ -59,11 +61,12 @@ namespace scalar {
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan = false);
// argsort
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t> argsort(T *arr, size_t arrsize);
XSS_HIDE_SYMBOL std::vector<size_t>
argsort(T *arr, size_t arrsize, bool hasnan = false);
// argselect
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t>
argselect(T *arr, size_t k, size_t arrsize);
argselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);
} // namespace scalar
} // namespace xss
#endif
15 changes: 11 additions & 4 deletions lib/x86simdsort-scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@
namespace xss {
namespace scalar {
template <typename T>
void qsort(T *arr, size_t arrsize)
void qsort(T *arr, size_t arrsize, bool hasnan)
{
std::sort(arr, arr + arrsize, compare<T, std::less<T>>());
if (hasnan) {
std::sort(arr, arr + arrsize, compare<T, std::less<T>>());
}
else {
std::sort(arr, arr + arrsize);
}
}
template <typename T>
void qselect(T *arr, size_t k, size_t arrsize, bool hasnan)
Expand All @@ -32,16 +37,18 @@ namespace scalar {
}
}
template <typename T>
std::vector<size_t> argsort(T *arr, size_t arrsize)
std::vector<size_t> argsort(T *arr, size_t arrsize, bool hasnan)
{
UNUSED(hasnan);
std::vector<size_t> arg(arrsize);
std::iota(arg.begin(), arg.end(), 0);
std::sort(arg.begin(), arg.end(), compare_arg<T, std::less<T>>(arr));
return arg;
}
template <typename T>
std::vector<size_t> argselect(T *arr, size_t k, size_t arrsize)
std::vector<size_t> argselect(T *arr, size_t k, size_t arrsize, bool hasnan)
{
UNUSED(hasnan);
std::vector<size_t> arg(arrsize);
std::iota(arg.begin(), arg.end(), 0);
std::nth_element(arg.begin(),
Expand Down
13 changes: 7 additions & 6 deletions lib/x86simdsort-skx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

#define DEFINE_ALL_METHODS(type) \
template <> \
void qsort(type *arr, size_t arrsize) \
void qsort(type *arr, size_t arrsize, bool hasnan) \
{ \
avx512_qsort(arr, arrsize); \
avx512_qsort(arr, arrsize, hasnan); \
} \
template <> \
void qselect(type *arr, size_t k, size_t arrsize, bool hasnan) \
Expand All @@ -21,14 +21,15 @@
avx512_partial_qsort(arr, k, arrsize, hasnan); \
} \
template <> \
std::vector<size_t> argsort(type *arr, size_t arrsize) \
std::vector<size_t> argsort(type *arr, size_t arrsize, bool hasnan) \
{ \
return avx512_argsort(arr, arrsize); \
return avx512_argsort(arr, arrsize, hasnan); \
} \
template <> \
std::vector<size_t> argselect(type *arr, size_t k, size_t arrsize) \
std::vector<size_t> argselect( \
type *arr, size_t k, size_t arrsize, bool hasnan) \
{ \
return avx512_argselect(arr, k, arrsize); \
return avx512_argselect(arr, k, arrsize, hasnan); \
}

namespace xss {
Expand Down
4 changes: 2 additions & 2 deletions lib/x86simdsort-spr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
namespace xss {
namespace avx512 {
template <>
void qsort(_Float16 *arr, size_t size)
void qsort(_Float16 *arr, size_t size, bool hasnan)
{
avx512_qsort(arr, size);
avx512_qsort(arr, size, hasnan);
}
template <>
void qselect(_Float16 *arr, size_t k, size_t arrsize, bool hasnan)
Expand Down
19 changes: 10 additions & 9 deletions lib/x86simdsort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ dispatch_requested(std::string_view cpurequested,
#define CAT(a, b) CAT_(a, b)

#define DECLARE_INTERNAL_qsort(TYPE) \
static void (*internal_qsort##TYPE)(TYPE *, size_t) = NULL; \
static void (*internal_qsort##TYPE)(TYPE *, size_t, bool) = NULL; \
template <> \
void qsort(TYPE *arr, size_t arrsize) \
void qsort(TYPE *arr, size_t arrsize, bool hasnan) \
{ \
(*internal_qsort##TYPE)(arr, arrsize); \
(*internal_qsort##TYPE)(arr, arrsize, hasnan); \
}

#define DECLARE_INTERNAL_qselect(TYPE) \
Expand All @@ -81,22 +81,23 @@ dispatch_requested(std::string_view cpurequested,
}

#define DECLARE_INTERNAL_argsort(TYPE) \
static std::vector<size_t> (*internal_argsort##TYPE)(TYPE *, size_t) \
static std::vector<size_t> (*internal_argsort##TYPE)(TYPE *, size_t, bool) \
= NULL; \
template <> \
std::vector<size_t> argsort(TYPE *arr, size_t arrsize) \
std::vector<size_t> argsort(TYPE *arr, size_t arrsize, bool hasnan) \
{ \
return (*internal_argsort##TYPE)(arr, arrsize); \
return (*internal_argsort##TYPE)(arr, arrsize, hasnan); \
}

#define DECLARE_INTERNAL_argselect(TYPE) \
static std::vector<size_t> (*internal_argselect##TYPE)( \
TYPE *, size_t, size_t) \
TYPE *, size_t, size_t, bool) \
= NULL; \
template <> \
std::vector<size_t> argselect(TYPE *arr, size_t k, size_t arrsize) \
std::vector<size_t> argselect( \
TYPE *arr, size_t k, size_t arrsize, bool hasnan) \
{ \
return (*internal_argselect##TYPE)(arr, k, arrsize); \
return (*internal_argselect##TYPE)(arr, k, arrsize, hasnan); \
}

/* runtime dispatch mechanism */
Expand Down
14 changes: 11 additions & 3 deletions lib/x86simdsort.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,33 @@

#define XSS_EXPORT_SYMBOL __attribute__((visibility("default")))
#define XSS_HIDE_SYMBOL __attribute__((visibility("hidden")))
#define UNUSED(x) (void)(x)

namespace x86simdsort {

// quicksort
template <typename T>
XSS_EXPORT_SYMBOL void qsort(T *arr, size_t arrsize);
XSS_EXPORT_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false);

// quickselect
template <typename T>
XSS_EXPORT_SYMBOL void
qselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);

// partial sort
template <typename T>
XSS_EXPORT_SYMBOL void
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan = false);

// argsort
template <typename T>
XSS_EXPORT_SYMBOL std::vector<size_t> argsort(T *arr, size_t arrsize);
XSS_EXPORT_SYMBOL std::vector<size_t>
argsort(T *arr, size_t arrsize, bool hasnan = false);

// argselect
template <typename T>
XSS_EXPORT_SYMBOL std::vector<size_t>
argselect(T *arr, size_t k, size_t arrsize);
argselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);

} // namespace x86simdsort
#endif
12 changes: 7 additions & 5 deletions src/avx512-16bit-qsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,12 +519,14 @@ bool is_a_nan<uint16_t>(uint16_t elem)
}

X86_SIMD_SORT_INLINE
void avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize)
void avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize, bool hasnan = false)
{
if (arrsize > 1) {
arrsize_t nan_count
= replace_nan_with_inf<zmm_vector<float16>, uint16_t>(arr,
arrsize);
arrsize_t nan_count = 0;
if (UNLIKELY(hasnan)) {
nan_count = replace_nan_with_inf<zmm_vector<float16>, uint16_t>(
arr, arrsize);
}
qsort_<zmm_vector<float16>, uint16_t>(
arr, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize));
replace_inf_with_nan(arr, arrsize, nan_count);
Expand All @@ -535,7 +537,7 @@ X86_SIMD_SORT_INLINE
void avx512_qselect_fp16(uint16_t *arr,
arrsize_t k,
arrsize_t arrsize,
bool hasnan = true)
bool hasnan = false)
{
arrsize_t indx_last_elem = arrsize - 1;
if (UNLIKELY(hasnan)) {
Expand Down
Loading