@@ -2884,7 +2884,7 @@ namespace {
2884
2884
2885
2885
do {
2886
2886
const __m256i _Data = _mm256_loadu_si256 (static_cast <const __m256i*>(_First));
2887
- int _Bingo = _mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand));
2887
+ unsigned int _Bingo = _mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand));
2888
2888
2889
2889
if constexpr (_Pred == _Predicate::_Not_equal) {
2890
2890
_Bingo ^= 0xFFFF'FFFF ;
@@ -2903,7 +2903,7 @@ namespace {
2903
2903
const __m256i _Tail_mask = _Avx2_tail_mask_32 (_Avx_tail_size);
2904
2904
const __m256i _Data = _mm256_maskload_epi32 (static_cast <const int *>(_First), _Tail_mask);
2905
2905
const __m256i _Cmp = _Traits::_Cmp_avx (_Data, _Comparand);
2906
- int _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Cmp, _Tail_mask));
2906
+ unsigned int _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Cmp, _Tail_mask));
2907
2907
2908
2908
if constexpr (_Pred == _Predicate::_Not_equal) {
2909
2909
_Bingo ^= (1 << _Avx_tail_size) - 1 ;
@@ -2928,7 +2928,7 @@ namespace {
2928
2928
2929
2929
do {
2930
2930
const __m128i _Data = _mm_loadu_si128 (static_cast <const __m128i*>(_First));
2931
- int _Bingo = _mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand));
2931
+ unsigned int _Bingo = _mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand));
2932
2932
2933
2933
if constexpr (_Pred == _Predicate::_Not_equal) {
2934
2934
_Bingo ^= 0xFFFF ;
@@ -2975,7 +2975,7 @@ namespace {
2975
2975
do {
2976
2976
_Rewind_bytes (_Last, 32 );
2977
2977
const __m256i _Data = _mm256_loadu_si256 (static_cast <const __m256i*>(_Last));
2978
- int _Bingo = _mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand));
2978
+ unsigned int _Bingo = _mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand));
2979
2979
2980
2980
if constexpr (_Pred == _Predicate::_Not_equal) {
2981
2981
_Bingo ^= 0xFFFF'FFFF ;
@@ -2993,7 +2993,7 @@ namespace {
2993
2993
const __m256i _Tail_mask = _Avx2_tail_mask_32 (_Avx_tail_size);
2994
2994
const __m256i _Data = _mm256_maskload_epi32 (static_cast <const int *>(_Last), _Tail_mask);
2995
2995
const __m256i _Cmp = _Traits::_Cmp_avx (_Data, _Comparand);
2996
- int _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Cmp, _Tail_mask));
2996
+ unsigned int _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Cmp, _Tail_mask));
2997
2997
2998
2998
if constexpr (_Pred == _Predicate::_Not_equal) {
2999
2999
_Bingo ^= (1 << _Avx_tail_size) - 1 ;
@@ -3017,7 +3017,7 @@ namespace {
3017
3017
do {
3018
3018
_Rewind_bytes (_Last, 16 );
3019
3019
const __m128i _Data = _mm_loadu_si128 (static_cast <const __m128i*>(_Last));
3020
- int _Bingo = _mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand));
3020
+ unsigned int _Bingo = _mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand));
3021
3021
3022
3022
if constexpr (_Pred == _Predicate::_Not_equal) {
3023
3023
_Bingo ^= 0xFFFF ;
@@ -3081,9 +3081,9 @@ namespace {
3081
3081
const void * _Next = _First;
3082
3082
_Advance_bytes (_Next, sizeof (_Ty));
3083
3083
3084
- const __m256i _Data = _mm256_loadu_si256 (static_cast <const __m256i*>(_First));
3085
- const __m256i _Comparand = _mm256_loadu_si256 (static_cast <const __m256i*>(_Next));
3086
- const int _Bingo = _mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand));
3084
+ const __m256i _Data = _mm256_loadu_si256 (static_cast <const __m256i*>(_First));
3085
+ const __m256i _Comparand = _mm256_loadu_si256 (static_cast <const __m256i*>(_Next));
3086
+ const unsigned int _Bingo = _mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand));
3087
3087
3088
3088
if (_Bingo != 0 ) {
3089
3089
const unsigned long _Offset = _tzcnt_u32 (_Bingo);
@@ -3098,11 +3098,11 @@ namespace {
3098
3098
const void * _Next = _First;
3099
3099
_Advance_bytes (_Next, sizeof (_Ty));
3100
3100
3101
- const __m256i _Tail_mask = _Avx2_tail_mask_32 (_Avx_tail_size);
3102
- const __m256i _Data = _mm256_maskload_epi32 (static_cast <const int *>(_First), _Tail_mask);
3103
- const __m256i _Comparand = _mm256_maskload_epi32 (static_cast <const int *>(_Next), _Tail_mask);
3104
- const __m256i _Cmp = _Traits::_Cmp_avx (_Data, _Comparand);
3105
- const int _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Cmp, _Tail_mask));
3101
+ const __m256i _Tail_mask = _Avx2_tail_mask_32 (_Avx_tail_size);
3102
+ const __m256i _Data = _mm256_maskload_epi32 (static_cast <const int *>(_First), _Tail_mask);
3103
+ const __m256i _Comparand = _mm256_maskload_epi32 (static_cast <const int *>(_Next), _Tail_mask);
3104
+ const __m256i _Cmp = _Traits::_Cmp_avx (_Data, _Comparand);
3105
+ const unsigned int _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Cmp, _Tail_mask));
3106
3106
3107
3107
if (_Bingo != 0 ) {
3108
3108
const unsigned long _Offset = _tzcnt_u32 (_Bingo);
@@ -3124,9 +3124,9 @@ namespace {
3124
3124
const void * _Next = _First;
3125
3125
_Advance_bytes (_Next, sizeof (_Ty));
3126
3126
3127
- const __m128i _Data = _mm_loadu_si128 (static_cast <const __m128i*>(_First));
3128
- const __m128i _Comparand = _mm_loadu_si128 (static_cast <const __m128i*>(_Next));
3129
- const int _Bingo = _mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand));
3127
+ const __m128i _Data = _mm_loadu_si128 (static_cast <const __m128i*>(_First));
3128
+ const __m128i _Comparand = _mm_loadu_si128 (static_cast <const __m128i*>(_Next));
3129
+ const unsigned int _Bingo = _mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand));
3130
3130
3131
3131
if (_Bingo != 0 ) {
3132
3132
unsigned long _Offset;
@@ -3183,8 +3183,8 @@ namespace {
3183
3183
do {
3184
3184
const __m256i _Data = _mm256_loadu_si256 (reinterpret_cast <const __m256i*>(_First));
3185
3185
3186
- const __m256i _Cmp = _Traits::_Cmp_avx (_Comparand, _Data);
3187
- const auto _Mask = static_cast < uint32_t >( _mm256_movemask_epi8 (_Cmp) );
3186
+ const __m256i _Cmp = _Traits::_Cmp_avx (_Comparand, _Data);
3187
+ const uint32_t _Mask = _mm256_movemask_epi8 (_Cmp);
3188
3188
3189
3189
uint64_t _MskX = uint64_t {_Carry} | (uint64_t {_Mask} << 32 );
3190
3190
@@ -3585,11 +3585,11 @@ namespace {
3585
3585
}
3586
3586
3587
3587
if (const size_t _Avx_tail_size = _Size_bytes & 0x1C ; _Avx_tail_size != 0 ) {
3588
- const __m256i _Tail_mask = _Avx2_tail_mask_32 (_Avx_tail_size);
3589
- const __m256i _Data = _mm256_maskload_epi32 (static_cast <const int *>(_First), _Tail_mask);
3590
- const __m256i _Mask = _mm256_and_si256 (_Traits::_Cmp_avx (_Data, _Comparand), _Tail_mask);
3591
- const int _Bingo = _mm256_movemask_epi8 (_Mask);
3592
- const size_t _Tail_count = __popcnt (_Bingo); // Assume available with SSE4.2
3588
+ const __m256i _Tail_mask = _Avx2_tail_mask_32 (_Avx_tail_size);
3589
+ const __m256i _Data = _mm256_maskload_epi32 (static_cast <const int *>(_First), _Tail_mask);
3590
+ const __m256i _Mask = _mm256_and_si256 (_Traits::_Cmp_avx (_Data, _Comparand), _Tail_mask);
3591
+ const unsigned int _Bingo = _mm256_movemask_epi8 (_Mask);
3592
+ const size_t _Tail_count = __popcnt (_Bingo); // Assume available with SSE4.2
3593
3593
_Result += _Tail_count / sizeof (_Ty);
3594
3594
_Advance_bytes (_First, _Avx_tail_size);
3595
3595
}
@@ -4292,8 +4292,8 @@ namespace {
4292
4292
_Found = _mm_and_si128 (_Found, _Found_part);
4293
4293
}
4294
4294
4295
- const int _Bingo = _mm_cvtsi128_si32 (_Found);
4296
- int _Found_pos = _Found_pos_init;
4295
+ const unsigned int _Bingo = _mm_cvtsi128_si32 (_Found);
4296
+ int _Found_pos = _Found_pos_init;
4297
4297
4298
4298
if (_Bingo != 0 ) {
4299
4299
unsigned long _Tmp;
@@ -4478,7 +4478,7 @@ namespace {
4478
4478
}
4479
4479
}
4480
4480
4481
- if (const int _Bingo = _mm256_movemask_epi8 (_Eq); _Bingo != 0 ) {
4481
+ if (const uint32_t _Bingo = _mm256_movemask_epi8 (_Eq); _Bingo != 0 ) {
4482
4482
const unsigned long _Offset = _tzcnt_u32 (_Bingo);
4483
4483
_Advance_bytes (_First1, _Offset);
4484
4484
return _First1;
@@ -4497,7 +4497,7 @@ namespace {
4497
4497
}
4498
4498
}
4499
4499
4500
- if (const int _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Eq, _Tail_mask)); _Bingo != 0 ) {
4500
+ if (const uint32_t _Bingo = _mm256_movemask_epi8 (_mm256_and_si256 (_Eq, _Tail_mask)); _Bingo != 0 ) {
4501
4501
const unsigned long _Offset = _tzcnt_u32 (_Bingo);
4502
4502
_Advance_bytes (_First1, _Offset);
4503
4503
return _First1;
@@ -4832,8 +4832,8 @@ namespace {
4832
4832
_Advance_bytes (_Cur_needle, 16 );
4833
4833
}
4834
4834
4835
- const int _Bingo = _mm_cvtsi128_si32 (_Found);
4836
- int _Found_pos = _Not_found;
4835
+ const unsigned int _Bingo = _mm_cvtsi128_si32 (_Found);
4836
+ int _Found_pos = _Not_found;
4837
4837
4838
4838
if (_Bingo != 0 ) {
4839
4839
unsigned long _Tmp;
@@ -5310,7 +5310,7 @@ namespace {
5310
5310
5311
5311
#pragma warning(push)
5312
5312
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
5313
- const auto _Check_first = [=, &_Mid1](long _Match) noexcept {
5313
+ const auto _Check_first = [=, &_Mid1](unsigned long _Match) noexcept {
5314
5314
while (_Match != 0 ) {
5315
5315
const unsigned int _Pos = _Traits::_Bsr (_Match);
5316
5316
@@ -5331,7 +5331,7 @@ namespace {
5331
5331
return false ;
5332
5332
};
5333
5333
5334
- const auto _Check = [=, &_Mid1](long _Match) noexcept {
5334
+ const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept {
5335
5335
while (_Match != 0 ) {
5336
5336
const unsigned int _Pos = _Traits::_Bsr (_Match);
5337
5337
@@ -5397,7 +5397,7 @@ namespace {
5397
5397
5398
5398
#pragma warning(push)
5399
5399
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
5400
- const auto _Check = [=, &_Mid1](long _Match) noexcept {
5400
+ const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept {
5401
5401
while (_Match != 0 ) {
5402
5402
const unsigned int _Pos = _Traits::_Bsr (_Match);
5403
5403
@@ -5696,7 +5696,7 @@ namespace {
5696
5696
#pragma warning(push)
5697
5697
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
5698
5698
const auto _Check_unfit = [=, &_Mid1](const unsigned int _Match) noexcept {
5699
- long _Unfit_match = _Match & _Needle_unfit_mask;
5699
+ unsigned long _Unfit_match = _Match & _Needle_unfit_mask;
5700
5700
while (_Unfit_match != 0 ) {
5701
5701
const void * _Tmp1 = _Mid1;
5702
5702
unsigned long _Match_last_pos;
@@ -5716,7 +5716,7 @@ namespace {
5716
5716
return true ;
5717
5717
}
5718
5718
5719
- _bittestandreset (& _Unfit_match, _Match_last_pos) ;
5719
+ _Unfit_match ^= 1 << _Match_last_pos;
5720
5720
}
5721
5721
5722
5722
return false ;
@@ -5773,7 +5773,7 @@ namespace {
5773
5773
5774
5774
#pragma warning(push)
5775
5775
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
5776
- const auto _Check = [=, &_Mid1](long _Match) noexcept {
5776
+ const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept {
5777
5777
while (_Match != 0 ) {
5778
5778
const void * _Tmp1 = _Mid1;
5779
5779
unsigned long _Match_last_pos;
@@ -5805,7 +5805,7 @@ namespace {
5805
5805
}
5806
5806
}
5807
5807
5808
- _bittestandreset (& _Match, _Match_last_pos) ;
5808
+ _Match ^= 1 << _Match_last_pos;
5809
5809
}
5810
5810
5811
5811
return false ;
@@ -7025,7 +7025,8 @@ namespace {
7025
7025
using _Traits_2_sse = void ;
7026
7026
#else // ^^^ defined(_M_ARM64EC) / !defined(_M_ARM64EC) vvv
7027
7027
struct _Traits_avx {
7028
- using _Vec = __m256i;
7028
+ using _Guard = _Zeroupper_on_exit;
7029
+ using _Vec = __m256i;
7029
7030
7030
7031
static __m256i _Load (const void * _Src) noexcept {
7031
7032
return _mm256_loadu_si256 (reinterpret_cast <const __m256i*>(_Src));
@@ -7045,7 +7046,8 @@ namespace {
7045
7046
};
7046
7047
7047
7048
struct _Traits_sse {
7048
- using _Vec = __m128i;
7049
+ using _Guard = char ;
7050
+ using _Vec = __m128i;
7049
7051
7050
7052
static __m128i _Load (const void * _Src) noexcept {
7051
7053
return _mm_loadu_si128 (reinterpret_cast <const __m128i*>(_Src));
@@ -7174,6 +7176,7 @@ namespace {
7174
7176
template <class _Traits , class _Elem >
7175
7177
bool _Impl (void * const _Dest, const _Elem* const _Src, const size_t _Size_bytes, const size_t _Size_bits,
7176
7178
const size_t _Size_chars, const _Elem _Elem0, const _Elem _Elem1) noexcept {
7179
+ [[maybe_unused]] typename _Traits::_Guard _Guard; // TRANSITION, DevCom-10331414
7177
7180
const auto _Dx0 = _Traits::_Set (_Elem0);
7178
7181
const auto _Dx1 = _Traits::_Set (_Elem1);
7179
7182
@@ -7190,14 +7193,12 @@ namespace {
7190
7193
7191
7194
// Convert characters to bits
7192
7195
if (!_Loop<_Traits>(_Src, _Src + _Size_convert, _Dx0, _Dx1, _Out)) {
7193
- _Traits::_Exit_vectorized (); // TRANSITION, DevCom-10331414
7194
7196
return false ;
7195
7197
}
7196
7198
7197
7199
// Verify remaining characters, if any
7198
7200
if (_Size_convert != _Size_chars
7199
7201
&& !_Loop<_Traits>(_Src + _Size_convert, _Src + _Size_chars, _Dx0, _Dx1, [](_Traits::_Vec) {})) {
7200
- _Traits::_Exit_vectorized (); // TRANSITION, DevCom-10331414
7201
7202
return false ;
7202
7203
}
7203
7204
@@ -7206,8 +7207,6 @@ namespace {
7206
7207
memset (_Dst_words, 0 , _Byte_length (_Dst_words, _Dst_words_end));
7207
7208
}
7208
7209
7209
- _Traits::_Exit_vectorized (); // TRANSITION, DevCom-10331414
7210
-
7211
7210
return true ;
7212
7211
}
7213
7212
#endif // ^^^ !defined(_M_ARM64EC) ^^^
0 commit comments