Skip to content

Commit 5224f60

Browse files
committed
Some style changes and commenting, plus limits thread count to 8 by default
1 parent 5c81fb1 commit 5224f60

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

src/xss-common-keyvaluesort.hpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
#include "xss-common-qsort.h"
1212
#include "xss-network-keyvaluesort.hpp"
1313

14+
#if defined(XSS_USE_OPENMP) && defined(_OPENMP)
15+
#define XSS_COMPILE_OPENMP
16+
#include <omp.h>
17+
#endif
18+
1419
/*
1520
* Parition one ZMM register based on the pivot and returns the index of the
1621
* last element that is less than equal to the pivot.
@@ -393,7 +398,7 @@ X86_SIMD_SORT_INLINE void kvsort_(type1_t *keys,
393398
arrsize_t pivot_index = kvpartition_unrolled<vtype1, vtype2, 4>(
394399
keys, indexes, left, right + 1, pivot, &smallest, &biggest);
395400

396-
#if defined(XSS_USE_OPENMP) && defined(_OPENMP)
401+
#ifdef XSS_COMPILE_OPENMP
397402
if (pivot != smallest) {
398403
bool parallel_left = (pivot_index - left) > task_threshold;
399404
if (parallel_left) {
@@ -534,18 +539,28 @@ X86_SIMD_SORT_INLINE void xss_qsort_kv(
534539
UNUSED(hasnan);
535540
}
536541

537-
#if defined(XSS_USE_OPENMP) && defined(_OPENMP)
542+
#ifdef XSS_COMPILE_OPENMP
543+
538544
bool use_parallel = arrsize > 10000;
539-
arrsize_t task_threshold = std::max((arrsize_t)10000, arrsize / 100);
545+
540546
if (use_parallel) {
541-
#pragma omp parallel
547+
// This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
548+
constexpr int thread_limit = 8;
549+
int thread_count = std::min(thread_limit, omp_get_max_threads());
550+
arrsize_t task_threshold
551+
= std::max((arrsize_t)10000, arrsize / 100);
552+
553+
// We use omp parallel and then omp single to setup the threads that will run the omp task calls in kvsort_
554+
// The omp single prevents multiple threads from running the initial kvsort_ simultaneously and causing problems
555+
// Note that we do not use the if(...) clause built into OpenMP, because it causes a performance regression for small arrays
556+
#pragma omp parallel num_threads(thread_count)
542557
#pragma omp single
543558
kvsort_<keytype, valtype>(
544559
keys, indexes, 0, arrsize - 1, maxiters, task_threshold);
545560
}
546561
else {
547562
kvsort_<keytype, valtype>(
548-
keys, indexes, 0, arrsize - 1, maxiters, task_threshold);
563+
keys, indexes, 0, arrsize - 1, maxiters, 0);
549564
}
550565
#else
551566
kvsort_<keytype, valtype>(keys, indexes, 0, arrsize - 1, maxiters, 0);

0 commit comments

Comments
 (0)