Skip to content

Commit 6e9a93c

Browse files
author
Raghuveer Devulapalli
committed
Median: use median of 8*8 elements
1 parent ab29478 commit 6e9a93c

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

src/avx512-64bit-common.h

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -384,21 +384,25 @@ X86_SIMD_SORT_INLINE type_t get_pivot_64bit(type_t *arr,
384384
const int64_t left,
385385
const int64_t right)
386386
{
387-
// median of 8
387+
// median of 8x8 elements
388388
int64_t size = (right - left) / 8;
389389
using zmm_t = typename vtype::zmm_t;
390-
__m512i rand_index = _mm512_set_epi64(left + size,
391-
left + 2 * size,
392-
left + 3 * size,
393-
left + 4 * size,
394-
left + 5 * size,
395-
left + 6 * size,
396-
left + 7 * size,
397-
left + 8 * size);
398-
zmm_t rand_vec = vtype::template i64gather<sizeof(type_t)>(rand_index, arr);
390+
zmm_t v[8];
391+
for (int64_t ii = 0; ii < 8; ++ii) {
392+
v[ii] = vtype::loadu(arr + left + ii*size);
393+
}
394+
COEX<vtype>(v[0], v[1]); COEX<vtype>(v[2], v[3]); /* step 1 */
395+
COEX<vtype>(v[4], v[5]); COEX<vtype>(v[6], v[7]);
396+
COEX<vtype>(v[0], v[2]); COEX<vtype>(v[1], v[3]); /* step 2 */
397+
COEX<vtype>(v[4], v[6]); COEX<vtype>(v[5], v[7]);
398+
COEX<vtype>(v[0], v[4]); COEX<vtype>(v[1], v[2]); /* step 3 */
399+
COEX<vtype>(v[5], v[6]); COEX<vtype>(v[3], v[7]);
400+
COEX<vtype>(v[1], v[5]); COEX<vtype>(v[2], v[6]); /* step 4 */
401+
COEX<vtype>(v[3], v[5]); COEX<vtype>(v[2], v[4]); /* step 5 */
402+
COEX<vtype>(v[3], v[4]); /* step 6 */
399403
// pivot will never be a nan, since there are no nan's!
400-
zmm_t sort = sort_zmm_64bit<vtype>(rand_vec);
404+
zmm_t sort = sort_zmm_64bit<vtype>(v[3]);
401405
return ((type_t *)&sort)[4];
402406
}
403407

404-
#endif
408+
#endif

0 commit comments

Comments
 (0)