Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 42 additions & 43 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2884,7 +2884,7 @@ namespace {

do {
const __m256i _Data = _mm256_loadu_si256(static_cast<const __m256i*>(_First));
int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand));
unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand));

if constexpr (_Pred == _Predicate::_Not_equal) {
_Bingo ^= 0xFFFF'FFFF;
Expand All @@ -2903,7 +2903,7 @@ namespace {
const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size);
const __m256i _Data = _mm256_maskload_epi32(static_cast<const int*>(_First), _Tail_mask);
const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand);
int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask));
unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask));

if constexpr (_Pred == _Predicate::_Not_equal) {
_Bingo ^= (1 << _Avx_tail_size) - 1;
Expand All @@ -2928,7 +2928,7 @@ namespace {

do {
const __m128i _Data = _mm_loadu_si128(static_cast<const __m128i*>(_First));
int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand));
unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand));

if constexpr (_Pred == _Predicate::_Not_equal) {
_Bingo ^= 0xFFFF;
Expand Down Expand Up @@ -2975,7 +2975,7 @@ namespace {
do {
_Rewind_bytes(_Last, 32);
const __m256i _Data = _mm256_loadu_si256(static_cast<const __m256i*>(_Last));
int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand));
unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand));

if constexpr (_Pred == _Predicate::_Not_equal) {
_Bingo ^= 0xFFFF'FFFF;
Expand All @@ -2993,7 +2993,7 @@ namespace {
const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size);
const __m256i _Data = _mm256_maskload_epi32(static_cast<const int*>(_Last), _Tail_mask);
const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand);
int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask));
unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask));

if constexpr (_Pred == _Predicate::_Not_equal) {
_Bingo ^= (1 << _Avx_tail_size) - 1;
Expand All @@ -3017,7 +3017,7 @@ namespace {
do {
_Rewind_bytes(_Last, 16);
const __m128i _Data = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand));
unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand));

if constexpr (_Pred == _Predicate::_Not_equal) {
_Bingo ^= 0xFFFF;
Expand Down Expand Up @@ -3081,9 +3081,9 @@ namespace {
const void* _Next = _First;
_Advance_bytes(_Next, sizeof(_Ty));

const __m256i _Data = _mm256_loadu_si256(static_cast<const __m256i*>(_First));
const __m256i _Comparand = _mm256_loadu_si256(static_cast<const __m256i*>(_Next));
const int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand));
const __m256i _Data = _mm256_loadu_si256(static_cast<const __m256i*>(_First));
const __m256i _Comparand = _mm256_loadu_si256(static_cast<const __m256i*>(_Next));
const unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand));

if (_Bingo != 0) {
const unsigned long _Offset = _tzcnt_u32(_Bingo);
Expand All @@ -3098,11 +3098,11 @@ namespace {
const void* _Next = _First;
_Advance_bytes(_Next, sizeof(_Ty));

const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size);
const __m256i _Data = _mm256_maskload_epi32(static_cast<const int*>(_First), _Tail_mask);
const __m256i _Comparand = _mm256_maskload_epi32(static_cast<const int*>(_Next), _Tail_mask);
const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand);
const int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask));
const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size);
const __m256i _Data = _mm256_maskload_epi32(static_cast<const int*>(_First), _Tail_mask);
const __m256i _Comparand = _mm256_maskload_epi32(static_cast<const int*>(_Next), _Tail_mask);
const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand);
const unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask));

if (_Bingo != 0) {
const unsigned long _Offset = _tzcnt_u32(_Bingo);
Expand All @@ -3124,9 +3124,9 @@ namespace {
const void* _Next = _First;
_Advance_bytes(_Next, sizeof(_Ty));

const __m128i _Data = _mm_loadu_si128(static_cast<const __m128i*>(_First));
const __m128i _Comparand = _mm_loadu_si128(static_cast<const __m128i*>(_Next));
const int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand));
const __m128i _Data = _mm_loadu_si128(static_cast<const __m128i*>(_First));
const __m128i _Comparand = _mm_loadu_si128(static_cast<const __m128i*>(_Next));
const unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand));

if (_Bingo != 0) {
unsigned long _Offset;
Expand Down Expand Up @@ -3183,8 +3183,8 @@ namespace {
do {
const __m256i _Data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(_First));

const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data);
const auto _Mask = static_cast<uint32_t>(_mm256_movemask_epi8(_Cmp));
const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data);
const uint32_t _Mask = _mm256_movemask_epi8(_Cmp);

uint64_t _MskX = uint64_t{_Carry} | (uint64_t{_Mask} << 32);

Expand Down Expand Up @@ -3585,11 +3585,11 @@ namespace {
}

if (const size_t _Avx_tail_size = _Size_bytes & 0x1C; _Avx_tail_size != 0) {
const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size);
const __m256i _Data = _mm256_maskload_epi32(static_cast<const int*>(_First), _Tail_mask);
const __m256i _Mask = _mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask);
const int _Bingo = _mm256_movemask_epi8(_Mask);
const size_t _Tail_count = __popcnt(_Bingo); // Assume available with SSE4.2
const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size);
const __m256i _Data = _mm256_maskload_epi32(static_cast<const int*>(_First), _Tail_mask);
const __m256i _Mask = _mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask);
const unsigned int _Bingo = _mm256_movemask_epi8(_Mask);
const size_t _Tail_count = __popcnt(_Bingo); // Assume available with SSE4.2
_Result += _Tail_count / sizeof(_Ty);
_Advance_bytes(_First, _Avx_tail_size);
}
Expand Down Expand Up @@ -4292,8 +4292,8 @@ namespace {
_Found = _mm_and_si128(_Found, _Found_part);
}

const int _Bingo = _mm_cvtsi128_si32(_Found);
int _Found_pos = _Found_pos_init;
const unsigned int _Bingo = _mm_cvtsi128_si32(_Found);
int _Found_pos = _Found_pos_init;

if (_Bingo != 0) {
unsigned long _Tmp;
Expand Down Expand Up @@ -4478,7 +4478,7 @@ namespace {
}
}

if (const int _Bingo = _mm256_movemask_epi8(_Eq); _Bingo != 0) {
if (const uint32_t _Bingo = _mm256_movemask_epi8(_Eq); _Bingo != 0) {
const unsigned long _Offset = _tzcnt_u32(_Bingo);
_Advance_bytes(_First1, _Offset);
return _First1;
Expand All @@ -4497,7 +4497,7 @@ namespace {
}
}

if (const int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Eq, _Tail_mask)); _Bingo != 0) {
if (const uint32_t _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Eq, _Tail_mask)); _Bingo != 0) {
const unsigned long _Offset = _tzcnt_u32(_Bingo);
_Advance_bytes(_First1, _Offset);
return _First1;
Expand Down Expand Up @@ -4832,8 +4832,8 @@ namespace {
_Advance_bytes(_Cur_needle, 16);
}

const int _Bingo = _mm_cvtsi128_si32(_Found);
int _Found_pos = _Not_found;
const unsigned int _Bingo = _mm_cvtsi128_si32(_Found);
int _Found_pos = _Not_found;

if (_Bingo != 0) {
unsigned long _Tmp;
Expand Down Expand Up @@ -5310,7 +5310,7 @@ namespace {

#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
const auto _Check_first = [=, &_Mid1](long _Match) noexcept {
const auto _Check_first = [=, &_Mid1](unsigned long _Match) noexcept {
while (_Match != 0) {
const unsigned int _Pos = _Traits::_Bsr(_Match);

Expand All @@ -5331,7 +5331,7 @@ namespace {
return false;
};

const auto _Check = [=, &_Mid1](long _Match) noexcept {
const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept {
while (_Match != 0) {
const unsigned int _Pos = _Traits::_Bsr(_Match);

Expand Down Expand Up @@ -5397,7 +5397,7 @@ namespace {

#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
const auto _Check = [=, &_Mid1](long _Match) noexcept {
const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept {
while (_Match != 0) {
const unsigned int _Pos = _Traits::_Bsr(_Match);

Expand Down Expand Up @@ -5696,7 +5696,7 @@ namespace {
#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
const auto _Check_unfit = [=, &_Mid1](const unsigned int _Match) noexcept {
long _Unfit_match = _Match & _Needle_unfit_mask;
unsigned long _Unfit_match = _Match & _Needle_unfit_mask;
while (_Unfit_match != 0) {
const void* _Tmp1 = _Mid1;
unsigned long _Match_last_pos;
Expand All @@ -5716,7 +5716,7 @@ namespace {
return true;
}

_bittestandreset(&_Unfit_match, _Match_last_pos);
_Unfit_match ^= 1 << _Match_last_pos;
}

return false;
Expand Down Expand Up @@ -5773,7 +5773,7 @@ namespace {

#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
const auto _Check = [=, &_Mid1](long _Match) noexcept {
const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept {
while (_Match != 0) {
const void* _Tmp1 = _Mid1;
unsigned long _Match_last_pos;
Expand Down Expand Up @@ -5805,7 +5805,7 @@ namespace {
}
}

_bittestandreset(&_Match, _Match_last_pos);
_Match ^= 1 << _Match_last_pos;
}

return false;
Expand Down Expand Up @@ -7025,7 +7025,8 @@ namespace {
using _Traits_2_sse = void;
#else // ^^^ defined(_M_ARM64EC) / !defined(_M_ARM64EC) vvv
struct _Traits_avx {
using _Vec = __m256i;
using _Guard = _Zeroupper_on_exit;
using _Vec = __m256i;

static __m256i _Load(const void* _Src) noexcept {
return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(_Src));
Expand All @@ -7045,7 +7046,8 @@ namespace {
};

struct _Traits_sse {
using _Vec = __m128i;
using _Guard = char;
using _Vec = __m128i;

static __m128i _Load(const void* _Src) noexcept {
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Src));
Expand Down Expand Up @@ -7174,6 +7176,7 @@ namespace {
template <class _Traits, class _Elem>
bool _Impl(void* const _Dest, const _Elem* const _Src, const size_t _Size_bytes, const size_t _Size_bits,
const size_t _Size_chars, const _Elem _Elem0, const _Elem _Elem1) noexcept {
[[maybe_unused]] typename _Traits::_Guard _Guard; // TRANSITION, DevCom-10331414
const auto _Dx0 = _Traits::_Set(_Elem0);
const auto _Dx1 = _Traits::_Set(_Elem1);

Expand All @@ -7190,14 +7193,12 @@ namespace {

// Convert characters to bits
if (!_Loop<_Traits>(_Src, _Src + _Size_convert, _Dx0, _Dx1, _Out)) {
_Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414
return false;
}

// Verify remaining characters, if any
if (_Size_convert != _Size_chars
&& !_Loop<_Traits>(_Src + _Size_convert, _Src + _Size_chars, _Dx0, _Dx1, [](_Traits::_Vec) {})) {
_Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414
return false;
}

Expand All @@ -7206,8 +7207,6 @@ namespace {
memset(_Dst_words, 0, _Byte_length(_Dst_words, _Dst_words_end));
}

_Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414

return true;
}
#endif // ^^^ !defined(_M_ARM64EC) ^^^
Expand Down