Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/build-numpy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,57 @@ jobs:
python -c "import numpy; numpy.show_config()" &&
python -m pytest $NUMPY_SITE/numpy/_core/tests/test_multiarray.py

NumPy-SPR-baseline:

runs-on: intel-ubuntu-latest

steps:
- name: Checkout x86-simd-sort
uses: actions/checkout@v3
with:
fetch-depth: 0
path: x86-simd-sort

- name: Specify branch name
working-directory: ${{ github.workspace }}/x86-simd-sort
run: git switch -c pr-branch

- name: Install build dependencies
run: |
sudo apt update
sudo apt -y install g++-12 gcc-12 git

- name: Checkout NumPy main
uses: actions/checkout@v3
with:
repository: numpy/numpy
submodules: recursive
fetch-depth: 0
ref: main
path: numpy

- uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install NumPy dependencies
working-directory: ${{ github.workspace }}/numpy
run: |
pip install -r build_requirements.txt
pip install -r test_requirements.txt

- name: Update x86-simd-sort
working-directory: ${{ github.workspace }}/numpy
run: |
cd numpy/_core/src/npysort/x86-simd-sort
git remote add temp ${{ github.workspace }}/x86-simd-sort
git fetch temp
git checkout temp/pr-branch

- name: Build NumPy with cpu basline SPR
working-directory: ${{ github.workspace }}/numpy
env:
CXX: g++-12
CC: gcc-12
run: |
spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr
31 changes: 15 additions & 16 deletions src/avx512-16bit-qsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,8 @@ struct zmm_vector<uint16_t> {
};

template <>
bool comparison_func<zmm_vector<float16>>(const uint16_t &a, const uint16_t &b)
X86_SIMD_SORT_INLINE_ONLY bool
comparison_func<zmm_vector<float16>>(const uint16_t &a, const uint16_t &b)
{
uint16_t signa = a & 0x8000, signb = b & 0x8000;
uint16_t expa = a & 0x7c00, expb = b & 0x7c00;
Expand Down Expand Up @@ -493,8 +494,8 @@ bool comparison_func<zmm_vector<float16>>(const uint16_t &a, const uint16_t &b)
}

template <>
arrsize_t replace_nan_with_inf<zmm_vector<float16>>(uint16_t *arr,
arrsize_t arrsize)
X86_SIMD_SORT_INLINE_ONLY arrsize_t
replace_nan_with_inf<zmm_vector<float16>>(uint16_t *arr, arrsize_t arrsize)
{
arrsize_t nan_count = 0;
__mmask16 loadmask = 0xFFFF;
Expand All @@ -513,13 +514,13 @@ arrsize_t replace_nan_with_inf<zmm_vector<float16>>(uint16_t *arr,
}

template <>
bool is_a_nan<uint16_t>(uint16_t elem)
X86_SIMD_SORT_INLINE_ONLY bool is_a_nan<uint16_t>(uint16_t elem)
{
return ((elem & 0x7c00u) == 0x7c00u) && ((elem & 0x03ffu) != 0);
}

X86_SIMD_SORT_INLINE
void avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize, bool hasnan = false)
X86_SIMD_SORT_INLINE void
avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize, bool hasnan = false)
{
if (arrsize > 1) {
arrsize_t nan_count = 0;
Expand All @@ -533,11 +534,10 @@ void avx512_qsort_fp16(uint16_t *arr, arrsize_t arrsize, bool hasnan = false)
}
}

X86_SIMD_SORT_INLINE
void avx512_qselect_fp16(uint16_t *arr,
arrsize_t k,
arrsize_t arrsize,
bool hasnan = false)
X86_SIMD_SORT_INLINE void avx512_qselect_fp16(uint16_t *arr,
arrsize_t k,
arrsize_t arrsize,
bool hasnan = false)
{
arrsize_t indx_last_elem = arrsize - 1;
if (UNLIKELY(hasnan)) {
Expand All @@ -549,11 +549,10 @@ void avx512_qselect_fp16(uint16_t *arr,
}
}

X86_SIMD_SORT_INLINE
void avx512_partial_qsort_fp16(uint16_t *arr,
arrsize_t k,
arrsize_t arrsize,
bool hasnan = false)
X86_SIMD_SORT_INLINE void avx512_partial_qsort_fp16(uint16_t *arr,
arrsize_t k,
arrsize_t arrsize,
bool hasnan = false)
{
avx512_qselect_fp16(arr, k - 1, arrsize, hasnan);
avx512_qsort_fp16(arr, k - 1);
Expand Down
12 changes: 7 additions & 5 deletions src/avx512-64bit-argsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,9 +657,8 @@ avx512_argsort(T *arr, arrsize_t *arg, arrsize_t arrsize, bool hasnan = false)
}

template <typename T>
X86_SIMD_SORT_INLINE std::vector<arrsize_t> avx512_argsort(T *arr,
arrsize_t arrsize,
bool hasnan = false)
X86_SIMD_SORT_INLINE std::vector<arrsize_t>
avx512_argsort(T *arr, arrsize_t arrsize, bool hasnan = false)
{
std::vector<arrsize_t> indices(arrsize);
std::iota(indices.begin(), indices.end(), 0);
Expand All @@ -669,8 +668,11 @@ X86_SIMD_SORT_INLINE std::vector<arrsize_t> avx512_argsort(T *arr,

/* argselect methods for 32-bit and 64-bit dtypes */
template <typename T>
X86_SIMD_SORT_INLINE void
avx512_argselect(T *arr, arrsize_t *arg, arrsize_t k, arrsize_t arrsize, bool hasnan = false)
X86_SIMD_SORT_INLINE void avx512_argselect(T *arr,
arrsize_t *arg,
arrsize_t k,
arrsize_t arrsize,
bool hasnan = false)
{
using vectype = typename std::conditional<sizeof(T) == sizeof(int32_t),
ymm_vector<T>,
Expand Down
17 changes: 9 additions & 8 deletions src/avx512fp16-16bit-qsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,14 @@ struct zmm_vector<_Float16> {
};

template <>
bool is_a_nan<_Float16>(_Float16 elem)
X86_SIMD_SORT_INLINE_ONLY bool is_a_nan<_Float16>(_Float16 elem)
{
return elem != elem;
}

template <>
void replace_inf_with_nan(_Float16 *arr, arrsize_t size, arrsize_t nan_count)
X86_SIMD_SORT_INLINE_ONLY void
replace_inf_with_nan(_Float16 *arr, arrsize_t size, arrsize_t nan_count)
{
Fp16Bits val;
val.i_ = 0x7c01;
Expand All @@ -177,7 +178,8 @@ void replace_inf_with_nan(_Float16 *arr, arrsize_t size, arrsize_t nan_count)
}
/* Specialized template function for _Float16 qsort_*/
template <>
void avx512_qsort(_Float16 *arr, arrsize_t arrsize, bool hasnan)
X86_SIMD_SORT_INLINE_ONLY void
avx512_qsort(_Float16 *arr, arrsize_t arrsize, bool hasnan)
{
if (arrsize > 1) {
arrsize_t nan_count = 0;
Expand All @@ -192,7 +194,8 @@ void avx512_qsort(_Float16 *arr, arrsize_t arrsize, bool hasnan)
}

template <>
void avx512_qselect(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
X86_SIMD_SORT_INLINE_ONLY void
avx512_qselect(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
{
arrsize_t indx_last_elem = arrsize - 1;
if (UNLIKELY(hasnan)) {
Expand All @@ -204,10 +207,8 @@ void avx512_qselect(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
}
}
template <>
void avx512_partial_qsort(_Float16 *arr,
arrsize_t k,
arrsize_t arrsize,
bool hasnan)
X86_SIMD_SORT_INLINE_ONLY void
avx512_partial_qsort(_Float16 *arr, arrsize_t k, arrsize_t arrsize, bool hasnan)
{
avx512_qselect(arr, k - 1, arrsize, hasnan);
avx512_qsort(arr, k - 1, hasnan);
Expand Down
4 changes: 4 additions & 0 deletions src/xss-common-includes.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

/* Compiler specific macros specific */
#ifdef _MSC_VER
#define X86_SIMD_SORT_INLINE_ONLY inline
#define X86_SIMD_SORT_INLINE static inline
#define X86_SIMD_SORT_FINLINE static __forceinline
#define LIKELY(x) (x)
Expand All @@ -47,14 +48,17 @@
* Force inline in cygwin to work around a compiler bug. See
* https://github.com/numpy/numpy/pull/22315#issuecomment-1267757584
*/
#define X86_SIMD_SORT_INLINE_ONLY inline

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes the comment above misplaced as it applies to the define below

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good point. Will fix the order :)

#define X86_SIMD_SORT_INLINE static __attribute__((always_inline))
#define X86_SIMD_SORT_FINLINE static __attribute__((always_inline))
#elif defined(__GNUC__)
#define X86_SIMD_SORT_INLINE_ONLY inline
#define X86_SIMD_SORT_INLINE static inline
#define X86_SIMD_SORT_FINLINE static inline __attribute__((always_inline))
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#else
#define X86_SIMD_SORT_INLINE_ONLY
#define X86_SIMD_SORT_INLINE static
#define X86_SIMD_SORT_FINLINE static
#define LIKELY(x) (x)
Expand Down