From 664bb63c2d1426c1c6fdf2f3169310e30fa6b4b7 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 12 May 2025 17:51:27 +0300 Subject: [PATCH 01/21] benchmark --- benchmarks/CMakeLists.txt | 1 + benchmarks/src/rotate.cpp | 46 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 benchmarks/src/rotate.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 04e47486df5..b6d8f540592 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -118,6 +118,7 @@ add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) add_benchmark(remove src/remove.cpp) add_benchmark(replace src/replace.cpp) +add_benchmark(rotate src/rotate.cpp) add_benchmark(search src/search.cpp) add_benchmark(search_n src/search_n.cpp) add_benchmark(std_copy src/std_copy.cpp) diff --git a/benchmarks/src/rotate.cpp b/benchmarks/src/rotate.cpp new file mode 100644 index 00000000000..333a54fdaf1 --- /dev/null +++ b/benchmarks/src/rotate.cpp @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include "skewed_allocator.hpp" +#include "utility.hpp" + +using namespace std; + +template +void bm_rotate(benchmark::State& state) { + const auto size = static_cast(state.range(0)); + const auto n = static_cast(state.range(1)); + + auto v = random_vector(size); + benchmark::DoNotOptimize(v); + + for (auto _ : state) { + rotate(v.begin(), v.begin() + n, v.end()); + benchmark::DoNotOptimize(v); + } +} + +void common_args(auto bm) { + bm->Args({3333, 2242})->Args({3333, 1111})->Args({3333, 501})->Args({3333, 12})->Args({3333, 5})->Args({3333, 1}); + bm->Args({333, 101})->Args({123, 32})->Args({23, 7})->Args({12, 5})->Args({3, 2}); +} + +struct color { + uint16_t h; + uint16_t s; + uint16_t l; +}; + +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); + +BENCHMARK(bm_rotate)->Apply(common_args); + +BENCHMARK_MAIN(); From 3d6a9b427aff2dd4d76e5becfceedb525cf87611 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 13 May 2025 19:15:52 +0300 Subject: [PATCH 02/21] test coverage --- .../VSO_0000000_vector_algorithms/test.cpp | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index d2f241334fb..f282c6be99b 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -743,6 +743,50 @@ void test_reverse_copy(mt19937_64& gen) { } } +template +void last_known_good_rotate( + RanIt first, RanIt mid, RanIt last, vector::value_type>& tmp) { + const auto size_left = mid - first; + const auto size_right = last - mid; + if (size_left <= size_right) { + tmp.assign(first, mid); + move_backward(mid, last, last - size_left); + move(tmp.begin(), tmp.end(), last - size_left); + } else { + tmp.assign(mid, last); + move(first, mid, first + size_right); + move(tmp.begin(), tmp.end(), first); + } +} + +template +void test_case_rotate(vector& actual, vector& expected, const ptrdiff_t pos, vector& tmp) { + last_known_good_rotate(expected.begin(), expected.begin() + pos, expected.end(), tmp); + rotate(actual.begin(), actual.begin() + pos, actual.end()); + assert(expected == actual); +} + +template +void test_rotate(mt19937_64& gen) { + vector actual; + vector expected; + vector tmp; + actual.reserve(dataCount); + expected.reserve(dataCount); + tmp.reserve(dataCount); + test_case_rotate(actual, expected, 0, tmp); + for (size_t attempts = 0; attempts < dataCount; ++attempts) { + actual.push_back(static_cast(gen())); // intentionally narrows + expected = actual; + + uniform_int_distribution dis_pos(0, static_cast(attempts)); + + for (size_t pos_count = 0; pos_count != 5; ++pos_count) { + test_case_rotate(actual, expected, dis_pos(gen), tmp); + } + } +} + template FwdIt2 last_known_good_swap_ranges(FwdIt1 first1, const FwdIt1 last1, FwdIt2 dest) { for (; first1 != last1; ++first1, ++dest) { @@ -1179,6 +1223,19 @@ void test_vector_algorithms(mt19937_64& gen) { test_reverse_copy(gen); test_reverse_copy(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_remove(gen); test_remove(gen); test_remove(gen); From 95ad7d1d355e9e00611ce5133980c6a4f50d9f4a Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 13 May 2025 19:55:16 +0300 Subject: [PATCH 03/21] vectorization --- stl/inc/xutility | 13 +++++ stl/src/vector_algorithms.cpp | 106 ++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/stl/inc/xutility b/stl/inc/xutility index f3660728b5f..dc37c7283c8 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -79,6 +79,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias( void* _First1, void* _Last1, void* _First2) noexcept; +__declspec(noalias) void __std_rotate(void* _First, void* _Mid, void* _Last) noexcept; + __declspec(noalias) size_t __stdcall __std_count_trivial_1( const void* _First, const void* _Last, uint8_t _Val) noexcept; __declspec(noalias) size_t __stdcall __std_count_trivial_2( @@ -6581,6 +6583,17 @@ _CONSTEXPR20 _FwdIt rotate(_FwdIt _First, _FwdIt _Mid, _FwdIt _Last) { } if constexpr (_Is_cpp17_random_iter_v<_FwdIt>) { +#if _USE_STD_VECTOR_ALGORITHMS + using _Elem = remove_reference_t<_Iter_ref_t>; + + if constexpr (conjunction_v>, + _Is_trivially_swappable<_Elem>, negation>>) { + if (!_STD _Is_constant_evaluated()) { + ::__std_rotate(_STD _To_address(_UFirst), _STD _To_address(_UMid), _STD _To_address(_ULast)); + return _First + (_Last - _Mid); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS _STD reverse(_UFirst, _UMid); _STD reverse(_UMid, _ULast); _STD reverse(_UFirst, _ULast); diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 0fb916c1318..fa72656f107 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -501,6 +501,112 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8( } // extern "C" +namespace { + namespace _Rotating { + // There's performance anomaly observed for memmove larger than 8192 elements + // It is CPU specific, appraently at least Intel Alder Lake is affected + // Re-implementing memmove here as a workaround + // We know whether we move to upper or lower address, so we can have + // two separate functions and no runtime branch + constexpr size_t _Portion_size = 8192; + constexpr size_t _Portion_mask = _Portion_size - 1; + static_assert((_Portion_size & _Portion_mask) == 0); + + void _Move_to_lower_address(void* _Dest, void* _Src, const size_t _Size) noexcept { + const size_t _Whole_portions_size = _Size & ~_Portion_mask; + + void* _Dest_end = _Dest; + _Advance_bytes(_Dest_end, _Whole_portions_size); + void* _Src_end = _Src; + _Advance_bytes(_Src_end, _Whole_portions_size); + + while (_Dest != _Dest_end) { + memmove(_Dest, _Src, _Portion_size); + _Advance_bytes(_Dest, _Portion_size); + _Advance_bytes(_Src, _Portion_size); + } + + if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { + memmove(_Dest, _Src, _Tail); + } + } + + void _Move_to_upper_address(void* const _Dest, void* const _Src, const size_t _Size) noexcept { + const size_t _Whole_portions_size = _Size & ~_Portion_mask; + + void* _Dest_end = _Dest; + _Advance_bytes(_Dest_end, _Whole_portions_size); + void* _Src_end = _Src; + _Advance_bytes(_Src_end, _Whole_portions_size); + + if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { + memmove(_Dest_end, _Src_end, _Tail); + } + + while (_Dest_end != _Dest) { + _Rewind_bytes(_Dest_end, _Portion_size); + _Rewind_bytes(_Src_end, _Portion_size); + memmove(_Dest_end, _Src_end, _Portion_size); + } + } + + constexpr size_t _Buf_size = 512; + + bool _Use_buffer(const size_t _Smaller, const size_t _Larger) noexcept { + return _Smaller <= _Buf_size && (_Smaller <= 128 || _Larger >= _Smaller * 2); + } + } // namespace _Rotating +} // unnamed namespace + +extern "C" { + +__declspec(noalias) void __std_rotate(void* _First, void* const _Mid, void* _Last) noexcept { + unsigned char _Buf[_Rotating::_Buf_size]; + + for (;;) { + const size_t _Left = _Byte_length(_First, _Mid); + const size_t _Right = _Byte_length(_Mid, _Last); + + if (_Left <= _Right) { + if (_Left == 0) { + break; + } + + if (_Rotating::_Use_buffer(_Left, _Right)) { + memcpy(_Buf, _First, _Left); + _Rotating::_Move_to_lower_address(_First, _Mid, _Right); + _Advance_bytes(_First, _Right); + memcpy(_First, _Buf, _Left); + break; + } + + void* _Mid2 = _Last; + _Rewind_bytes(_Mid2, _Left); + __std_swap_ranges_trivially_swappable_noalias(_Mid2, _Last, _First); + _Last = _Mid2; + } else { + if (_Right == 0) { + break; + } + + if (_Rotating::_Use_buffer(_Right, _Left)) { + _Rewind_bytes(_Last, _Right); + memcpy(_Buf, _Last, _Right); + void* _Mid2 = _First; + _Advance_bytes(_Mid2, _Right); + _Rotating::_Move_to_upper_address(_Mid2, _First, _Left); + memcpy(_First, _Buf, _Right); + break; + } + + __std_swap_ranges_trivially_swappable_noalias(_Mid, _Last, _First); + _Advance_bytes(_First, _Right); + } + } +} + +} // extern "C" + namespace { namespace _Sorting { template From 7cdd92449edee231d7586834401ef7b07d550133 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 May 2025 09:31:14 -0700 Subject: [PATCH 04/21] Drop unnecessary `std::`. --- benchmarks/src/rotate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/src/rotate.cpp b/benchmarks/src/rotate.cpp index 333a54fdaf1..390e8bbe7fe 100644 --- a/benchmarks/src/rotate.cpp +++ b/benchmarks/src/rotate.cpp @@ -36,10 +36,10 @@ struct color { uint16_t l; }; -BENCHMARK(bm_rotate)->Apply(common_args); -BENCHMARK(bm_rotate)->Apply(common_args); -BENCHMARK(bm_rotate)->Apply(common_args); -BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); BENCHMARK(bm_rotate)->Apply(common_args); From 5f5bd0e13d6046755ddbbe713cc1175cd2ac61ff Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 May 2025 09:37:08 -0700 Subject: [PATCH 05/21] Add missing calling convention to `__std_rotate`. --- stl/inc/xutility | 2 +- stl/src/vector_algorithms.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index dc37c7283c8..48103aa2f58 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -79,7 +79,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias( void* _First1, void* _Last1, void* _First2) noexcept; -__declspec(noalias) void __std_rotate(void* _First, void* _Mid, void* _Last) noexcept; +__declspec(noalias) void __stdcall __std_rotate(void* _First, void* _Mid, void* _Last) noexcept; __declspec(noalias) size_t __stdcall __std_count_trivial_1( const void* _First, const void* _Last, uint8_t _Val) noexcept; diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index fa72656f107..c989141c4a4 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -560,7 +560,7 @@ namespace { extern "C" { -__declspec(noalias) void __std_rotate(void* _First, void* const _Mid, void* _Last) noexcept { +__declspec(noalias) void __stdcall __std_rotate(void* _First, void* const _Mid, void* _Last) noexcept { unsigned char _Buf[_Rotating::_Buf_size]; for (;;) { From 01d735962011c190eb8e60b5c1a43eb4841a39fa Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 May 2025 09:56:58 -0700 Subject: [PATCH 06/21] Keep `actual` and `expected` in sync with less work. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index f282c6be99b..1d7f67a90c3 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -776,8 +776,9 @@ void test_rotate(mt19937_64& gen) { tmp.reserve(dataCount); test_case_rotate(actual, expected, 0, tmp); for (size_t attempts = 0; attempts < dataCount; ++attempts) { - actual.push_back(static_cast(gen())); // intentionally narrows - expected = actual; + const T val = static_cast(gen()); // intentionally narrows + actual.push_back(val); + expected.push_back(val); uniform_int_distribution dis_pos(0, static_cast(attempts)); From f92550d511d5a01fe20a24f677b148522d52acc9 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 May 2025 10:33:33 -0700 Subject: [PATCH 07/21] Cite GH 5506. --- stl/src/vector_algorithms.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index c989141c4a4..4c6b91ad992 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -503,11 +503,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8( namespace { namespace _Rotating { - // There's performance anomaly observed for memmove larger than 8192 elements - // It is CPU specific, appraently at least Intel Alder Lake is affected - // Re-implementing memmove here as a workaround - // We know whether we move to upper or lower address, so we can have - // two separate functions and no runtime branch + // TRANSITION, GH-5506 "VCRuntime: memmove() is surprisingly slow for more than 8 KB on certain CPUs": + // As a workaround, the following code calls memmove() for 8 KB portions. constexpr size_t _Portion_size = 8192; constexpr size_t _Portion_mask = _Portion_size - 1; static_assert((_Portion_size & _Portion_mask) == 0); From 0b7c67894742753cc31a9b27121c5e9ecde0c9ac Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 May 2025 10:34:40 -0700 Subject: [PATCH 08/21] upper_address => higher_address --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 4c6b91ad992..d8e1bdce2c3 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -528,7 +528,7 @@ namespace { } } - void _Move_to_upper_address(void* const _Dest, void* const _Src, const size_t _Size) noexcept { + void _Move_to_higher_address(void* const _Dest, void* const _Src, const size_t _Size) noexcept { const size_t _Whole_portions_size = _Size & ~_Portion_mask; void* _Dest_end = _Dest; @@ -591,7 +591,7 @@ __declspec(noalias) void __stdcall __std_rotate(void* _First, void* const _Mid, memcpy(_Buf, _Last, _Right); void* _Mid2 = _First; _Advance_bytes(_Mid2, _Right); - _Rotating::_Move_to_upper_address(_Mid2, _First, _Left); + _Rotating::_Move_to_higher_address(_Mid2, _First, _Left); memcpy(_First, _Buf, _Right); break; } From 0590287ef818123e17943cf71088c7e2ca3f44ed Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 May 2025 11:13:53 -0700 Subject: [PATCH 09/21] I see dead variables. They don't even know they're dead. --- stl/src/vector_algorithms.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index d8e1bdce2c3..5719adfed38 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -514,8 +514,6 @@ namespace { void* _Dest_end = _Dest; _Advance_bytes(_Dest_end, _Whole_portions_size); - void* _Src_end = _Src; - _Advance_bytes(_Src_end, _Whole_portions_size); while (_Dest != _Dest_end) { memmove(_Dest, _Src, _Portion_size); From 6b6e37e7caac85fda9f8c86b81f7766c626c2e92 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 May 2025 11:18:03 -0700 Subject: [PATCH 10/21] Sources should point to const. --- stl/src/vector_algorithms.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 5719adfed38..d603194b5e1 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -509,7 +509,7 @@ namespace { constexpr size_t _Portion_mask = _Portion_size - 1; static_assert((_Portion_size & _Portion_mask) == 0); - void _Move_to_lower_address(void* _Dest, void* _Src, const size_t _Size) noexcept { + void _Move_to_lower_address(void* _Dest, const void* _Src, const size_t _Size) noexcept { const size_t _Whole_portions_size = _Size & ~_Portion_mask; void* _Dest_end = _Dest; @@ -526,12 +526,12 @@ namespace { } } - void _Move_to_higher_address(void* const _Dest, void* const _Src, const size_t _Size) noexcept { + void _Move_to_higher_address(void* const _Dest, const void* const _Src, const size_t _Size) noexcept { const size_t _Whole_portions_size = _Size & ~_Portion_mask; void* _Dest_end = _Dest; _Advance_bytes(_Dest_end, _Whole_portions_size); - void* _Src_end = _Src; + const void* _Src_end = _Src; _Advance_bytes(_Src_end, _Whole_portions_size); if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { From bf77cd26567d6ea0e0974710905579ccab2d9adb Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 16 May 2025 08:45:47 +0300 Subject: [PATCH 11/21] ranges coverage --- .../VSO_0000000_vector_algorithms/test.cpp | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 1d7f67a90c3..826adc0a1c3 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -760,30 +760,44 @@ void last_known_good_rotate( } template -void test_case_rotate(vector& actual, vector& expected, const ptrdiff_t pos, vector& tmp) { +void test_case_rotate( + vector& actual, vector& actual_r, vector& expected, const ptrdiff_t pos, vector& tmp) { + const ptrdiff_t shift = static_cast(expected.size()) - pos; last_known_good_rotate(expected.begin(), expected.begin() + pos, expected.end(), tmp); - rotate(actual.begin(), actual.begin() + pos, actual.end()); + const auto it = rotate(actual.begin(), actual.begin() + pos, actual.end()); assert(expected == actual); + assert(it == actual.begin() + shift); +#if !_HAS_CXX20 + (void) actual_r; +#else // ^^^ !_HAS_CXX20 / _HAS_CXX20 vvv + const auto rng = ranges::rotate(actual_r.begin(), actual_r.begin() + pos, actual_r.end()); + assert(expected == actual_r); + assert(begin(rng) == actual_r.begin() + shift); + assert(end(rng) == actual_r.end()); +#endif // ^^^ _HAS_CXX20 ^^^ } template void test_rotate(mt19937_64& gen) { vector actual; + vector actual_r; vector expected; vector tmp; actual.reserve(dataCount); + actual_r.reserve(dataCount); expected.reserve(dataCount); tmp.reserve(dataCount); - test_case_rotate(actual, expected, 0, tmp); + test_case_rotate(actual, actual_r, expected, 0, tmp); for (size_t attempts = 0; attempts < dataCount; ++attempts) { const T val = static_cast(gen()); // intentionally narrows actual.push_back(val); + actual_r.push_back(val); expected.push_back(val); - uniform_int_distribution dis_pos(0, static_cast(attempts)); + uniform_int_distribution dis_pos(0, static_cast(attempts) + 1); for (size_t pos_count = 0; pos_count != 5; ++pos_count) { - test_case_rotate(actual, expected, dis_pos(gen), tmp); + test_case_rotate(actual, actual_r, expected, dis_pos(gen), tmp); } } } From 19b5aa21534964162523f2edbde62c9ea6c18e70 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 16 May 2025 09:01:14 +0300 Subject: [PATCH 12/21] integer class diffference coverage for `rotate_copy` too, it uses `memcpy`, hence considered vectorized --- .../test.cpp | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp b/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp index fcd796d73a1..ceb065012c0 100644 --- a/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp +++ b/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp @@ -109,7 +109,7 @@ int main() { picky_contiguous_iterator float_arr_begin(begin(float_arr)); picky_contiguous_iterator float_arr_end(end(float_arr)); - transform(arr_begin, arr_end, float_arr_begin, [](int v) { return static_cast(v); }); + transform(arr_begin, arr_end, float_arr_begin, [](const int v) { return static_cast(v); }); assert(ranges::min(ranges::subrange(float_arr_begin, float_arr_end)) == 200.0); assert(ranges::max(ranges::subrange(float_arr_begin, float_arr_end)) == 390.0); @@ -207,6 +207,30 @@ int main() { ranges::reverse(temp_begin, temp_end); assert(ranges::equal(temp_begin, temp_end, begin(reverse_expected), end(reverse_expected))); } + { + const int rotate_expected[] = { + 250, 270, 280, 290, 300, 310, 320, 250, 340, 250, 250, 370, 380, 390, 200, 210, 220, 250, 240, 250}; + + const _Signed128 rotate_pos = 6; + + auto rot_copy_it = rotate_copy(arr_begin, arr_begin + rotate_pos, arr_end, temp_begin); + assert(equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(rot_copy_it == temp_end); + + copy(arr_begin, arr_end, temp_begin); + auto rot_it = rotate(temp_begin, temp_begin + rotate_pos, temp_end); + assert(equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(rot_it == temp_end - rotate_pos); + + auto r_rot_copy_it = ranges::rotate_copy(arr_begin, arr_begin + rotate_pos, arr_end, temp_begin).out; + assert(ranges::equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(r_rot_copy_it == temp_end); + + ranges::copy(arr_begin, arr_end, temp_begin); + auto r_rot_it = begin(ranges::rotate(temp_begin, temp_begin + rotate_pos, temp_end)); + assert(ranges::equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(r_rot_it == temp_end - rotate_pos); + } { // Out of replace family, only replace for 32-bit and 64-bit elements is manually vectorized, // replace_copy is auto vectorized (along with replace_copy_if) From d60725d0a1ecd38cd565237fc5128da87852c435 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 16 May 2025 09:13:15 +0300 Subject: [PATCH 13/21] ranges benchmark --- benchmarks/src/rotate.cpp | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/benchmarks/src/rotate.cpp b/benchmarks/src/rotate.cpp index 390e8bbe7fe..69226283c8b 100644 --- a/benchmarks/src/rotate.cpp +++ b/benchmarks/src/rotate.cpp @@ -11,7 +11,9 @@ using namespace std; -template +enum class AlgType { Std, Rng }; + +template void bm_rotate(benchmark::State& state) { const auto size = static_cast(state.range(0)); const auto n = static_cast(state.range(1)); @@ -20,7 +22,11 @@ void bm_rotate(benchmark::State& state) { benchmark::DoNotOptimize(v); for (auto _ : state) { - rotate(v.begin(), v.begin() + n, v.end()); + if constexpr (Alg == AlgType::Std) { + rotate(v.begin(), v.begin() + n, v.end()); + } else { + ranges::rotate(v, v.begin() + n); + } benchmark::DoNotOptimize(v); } } @@ -36,11 +42,16 @@ struct color { uint16_t l; }; -BENCHMARK(bm_rotate)->Apply(common_args); -BENCHMARK(bm_rotate)->Apply(common_args); -BENCHMARK(bm_rotate)->Apply(common_args); -BENCHMARK(bm_rotate)->Apply(common_args); - -BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); + +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); BENCHMARK_MAIN(); From fcc41ccf9f18a78ad248c472d754aa1c4a9db7c1 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 16 May 2025 09:16:00 +0300 Subject: [PATCH 14/21] more benchmark cases --- benchmarks/src/rotate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/src/rotate.cpp b/benchmarks/src/rotate.cpp index 69226283c8b..9ba7c4b898f 100644 --- a/benchmarks/src/rotate.cpp +++ b/benchmarks/src/rotate.cpp @@ -32,7 +32,8 @@ void bm_rotate(benchmark::State& state) { } void common_args(auto bm) { - bm->Args({3333, 2242})->Args({3333, 1111})->Args({3333, 501})->Args({3333, 12})->Args({3333, 5})->Args({3333, 1}); + bm->Args({3333, 2242})->Args({3332, 1666})->Args({3333, 1111})->Args({3333, 501}); + bm->Args({3333, 3300})->Args({3333, 12})->Args({3333, 5})->Args({3333, 1}); bm->Args({333, 101})->Args({123, 32})->Args({23, 7})->Args({12, 5})->Args({3, 2}); } From ef3be12ef2e8532e964d36d143bed55c7a5265e2 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 16 May 2025 09:16:17 +0300 Subject: [PATCH 15/21] ranges codepath --- stl/inc/algorithm | 13 +++++++++++++ stl/inc/xutility | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 6d7e76ed4af..6516bc8a9bf 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -5768,6 +5768,19 @@ namespace ranges { } if constexpr (bidirectional_iterator<_It>) { +#if _USE_STD_VECTOR_ALGORITHMS + using _Elem = iter_value_t<_It>; + + if constexpr (_Iterator_is_contiguous<_It> && sized_sentinel_for<_Se, _It> + && conjunction_v<_Is_trivially_swappable<_Elem>, negation>>) { + if (!_STD is_constant_evaluated()) { + const _It _Last_it = _First + (_Last - _First); + ::__std_rotate(_STD to_address(_First), _STD to_address(_Mid), _STD to_address(_Last_it)); + return {_First + (_Last - _Mid), _Last}; + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + _RANGES _Reverse_common(_First, _Mid); auto _Final = _RANGES _Get_final_iterator_unwrapped<_It>(_Mid, _STD move(_Last)); _RANGES _Reverse_common(_Mid, _Final); diff --git a/stl/inc/xutility b/stl/inc/xutility index 48103aa2f58..18cabfa3dcf 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6584,7 +6584,7 @@ _CONSTEXPR20 _FwdIt rotate(_FwdIt _First, _FwdIt _Mid, _FwdIt _Last) { if constexpr (_Is_cpp17_random_iter_v<_FwdIt>) { #if _USE_STD_VECTOR_ALGORITHMS - using _Elem = remove_reference_t<_Iter_ref_t>; + using _Elem = _Iter_value_t; if constexpr (conjunction_v>, _Is_trivially_swappable<_Elem>, negation>>) { From 55156e919b907f5dea4173d8e2063b15563565e5 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 16 May 2025 09:49:39 +0300 Subject: [PATCH 16/21] we want swappable --- .../test.compile.pass.cpp | 3 --- .../test.compile.pass.cpp | 19 ------------------- 2 files changed, 22 deletions(-) diff --git a/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp b/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp index 4d4ff4c7094..ae5c62512e1 100644 --- a/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp +++ b/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp @@ -945,9 +945,6 @@ void test_ranges_non_projected_algorithms() { (void) reverse_copy(varr, varr, varr2); (void) reverse_copy(varr, varr2); - (void) rotate(varr, varr, varr); - (void) rotate(varr, varr); - (void) rotate_copy(varr, varr, varr, varr2); (void) rotate_copy(varr, varr, varr2); diff --git a/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp b/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp index 7e5dd31a397..481c93fa2c7 100644 --- a/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp +++ b/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp @@ -347,15 +347,6 @@ void test_ranges_algorithms() { // sort(iarr, validating_less{}); // need to check ADL-found swap sort(iarr, iarr, {}, validating_identity{}); - stable_sort(varr, varr); - stable_sort(varr); - stable_sort(varr, varr, less{}); - // stable_sort(varr, less{}); // need to check ADL-found operator== - stable_sort(varr, &less_function); - stable_sort(iarr, iarr, validating_less{}); - // stable_sort(iarr, validating_less{}); // need to check ADL-found swap - stable_sort(iarr, iarr, {}, validating_identity{}); - partial_sort(varr, varr, varr); partial_sort(varr, varr, static_cast* const*>(varr)); // non-common partial_sort(varr, varr); @@ -429,11 +420,6 @@ void test_ranges_algorithms() { (void) partition(iarr, iarr, validating_zero_equality{}); (void) partition(iarr, validating_zero_equality{}); - (void) stable_partition(varr, varr, simple_zero_equality{}); - (void) stable_partition(varr, simple_zero_equality{}); - (void) stable_partition(iarr, iarr, validating_zero_equality{}); - (void) stable_partition(iarr, validating_zero_equality{}); - int iarr3[2]{}; validator varr3[2]{}; @@ -453,11 +439,6 @@ void test_ranges_algorithms() { (void) merge(iarr, iarr, iarr2, iarr2, iarr3, validating_less{}); (void) merge(iarr, iarr2, iarr3, validating_less{}); - (void) inplace_merge(varr, varr, varr); - (void) inplace_merge(varr, varr); - (void) inplace_merge(iarr, iarr, iarr, validating_less{}); - (void) inplace_merge(iarr, iarr, {}, validating_identity{}); - (void) includes(varr, varr, varr, varr); (void) includes(varr, varr); (void) includes(iarr, iarr, iarr, iarr, validating_less{}); From a7b6a6e574e0177b92e1fb023baaf99d46bf53c2 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 16 May 2025 02:24:01 -0700 Subject: [PATCH 17/21] Properly detect element volatility. --- stl/inc/algorithm | 2 +- stl/inc/xutility | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 6516bc8a9bf..8bbbd658dab 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -5769,7 +5769,7 @@ namespace ranges { if constexpr (bidirectional_iterator<_It>) { #if _USE_STD_VECTOR_ALGORITHMS - using _Elem = iter_value_t<_It>; + using _Elem = remove_reference_t>; if constexpr (_Iterator_is_contiguous<_It> && sized_sentinel_for<_Se, _It> && conjunction_v<_Is_trivially_swappable<_Elem>, negation>>) { diff --git a/stl/inc/xutility b/stl/inc/xutility index 18cabfa3dcf..48103aa2f58 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6584,7 +6584,7 @@ _CONSTEXPR20 _FwdIt rotate(_FwdIt _First, _FwdIt _Mid, _FwdIt _Last) { if constexpr (_Is_cpp17_random_iter_v<_FwdIt>) { #if _USE_STD_VECTOR_ALGORITHMS - using _Elem = _Iter_value_t; + using _Elem = remove_reference_t<_Iter_ref_t>; if constexpr (conjunction_v>, _Is_trivially_swappable<_Elem>, negation>>) { From 498b75962b5bcfe4ad0b375deaf2a3b849e4471f Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 16 May 2025 02:27:55 -0700 Subject: [PATCH 18/21] Use `_Is_trivially_ranges_swappable`... --- stl/inc/algorithm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 8bbbd658dab..66aaa756765 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -5772,7 +5772,7 @@ namespace ranges { using _Elem = remove_reference_t>; if constexpr (_Iterator_is_contiguous<_It> && sized_sentinel_for<_Se, _It> - && conjunction_v<_Is_trivially_swappable<_Elem>, negation>>) { + && conjunction_v<_Is_trivially_ranges_swappable<_Elem>, negation>>) { if (!_STD is_constant_evaluated()) { const _It _Last_it = _First + (_Last - _First); ::__std_rotate(_STD to_address(_First), _STD to_address(_Mid), _STD to_address(_Last_it)); From 6a5c9f1bddbec73ab7c7c8a6afd40c45ae6af10d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 16 May 2025 03:00:46 -0700 Subject: [PATCH 19/21] ... so we can revert changes to ADL tests. --- .../test.compile.pass.cpp | 3 +++ .../test.compile.pass.cpp | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp b/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp index ae5c62512e1..4d4ff4c7094 100644 --- a/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp +++ b/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp @@ -945,6 +945,9 @@ void test_ranges_non_projected_algorithms() { (void) reverse_copy(varr, varr, varr2); (void) reverse_copy(varr, varr2); + (void) rotate(varr, varr, varr); + (void) rotate(varr, varr); + (void) rotate_copy(varr, varr, varr, varr2); (void) rotate_copy(varr, varr, varr2); diff --git a/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp b/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp index 481c93fa2c7..7e5dd31a397 100644 --- a/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp +++ b/tests/std/tests/P2538R1_adl_proof_std_projected/test.compile.pass.cpp @@ -347,6 +347,15 @@ void test_ranges_algorithms() { // sort(iarr, validating_less{}); // need to check ADL-found swap sort(iarr, iarr, {}, validating_identity{}); + stable_sort(varr, varr); + stable_sort(varr); + stable_sort(varr, varr, less{}); + // stable_sort(varr, less{}); // need to check ADL-found operator== + stable_sort(varr, &less_function); + stable_sort(iarr, iarr, validating_less{}); + // stable_sort(iarr, validating_less{}); // need to check ADL-found swap + stable_sort(iarr, iarr, {}, validating_identity{}); + partial_sort(varr, varr, varr); partial_sort(varr, varr, static_cast* const*>(varr)); // non-common partial_sort(varr, varr); @@ -420,6 +429,11 @@ void test_ranges_algorithms() { (void) partition(iarr, iarr, validating_zero_equality{}); (void) partition(iarr, validating_zero_equality{}); + (void) stable_partition(varr, varr, simple_zero_equality{}); + (void) stable_partition(varr, simple_zero_equality{}); + (void) stable_partition(iarr, iarr, validating_zero_equality{}); + (void) stable_partition(iarr, validating_zero_equality{}); + int iarr3[2]{}; validator varr3[2]{}; @@ -439,6 +453,11 @@ void test_ranges_algorithms() { (void) merge(iarr, iarr, iarr2, iarr2, iarr3, validating_less{}); (void) merge(iarr, iarr2, iarr3, validating_less{}); + (void) inplace_merge(varr, varr, varr); + (void) inplace_merge(varr, varr); + (void) inplace_merge(iarr, iarr, iarr, validating_less{}); + (void) inplace_merge(iarr, iarr, {}, validating_identity{}); + (void) includes(varr, varr, varr, varr); (void) includes(varr, varr); (void) includes(iarr, iarr, iarr, iarr, validating_less{}); From 501a0e413f6b94ea3b1f37751213493130a7d812 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 16 May 2025 02:33:38 -0700 Subject: [PATCH 20/21] C++20 should directly use `contiguous_iterator`. --- stl/inc/algorithm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 66aaa756765..32ac49a8a7c 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -5771,7 +5771,7 @@ namespace ranges { #if _USE_STD_VECTOR_ALGORITHMS using _Elem = remove_reference_t>; - if constexpr (_Iterator_is_contiguous<_It> && sized_sentinel_for<_Se, _It> + if constexpr (contiguous_iterator<_It> && sized_sentinel_for<_Se, _It> && conjunction_v<_Is_trivially_ranges_swappable<_Elem>, negation>>) { if (!_STD is_constant_evaluated()) { const _It _Last_it = _First + (_Last - _First); From 76d3b38bc9e44060ac29b5a4e9a878f3f3fe92cd Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 16 May 2025 02:45:57 -0700 Subject: [PATCH 21/21] Test `_HAS_CXX20` positively. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 826adc0a1c3..024086ffc91 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -767,14 +767,14 @@ void test_case_rotate( const auto it = rotate(actual.begin(), actual.begin() + pos, actual.end()); assert(expected == actual); assert(it == actual.begin() + shift); -#if !_HAS_CXX20 - (void) actual_r; -#else // ^^^ !_HAS_CXX20 / _HAS_CXX20 vvv +#if _HAS_CXX20 const auto rng = ranges::rotate(actual_r.begin(), actual_r.begin() + pos, actual_r.end()); assert(expected == actual_r); assert(begin(rng) == actual_r.begin() + shift); assert(end(rng) == actual_r.end()); -#endif // ^^^ _HAS_CXX20 ^^^ +#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv + (void) actual_r; +#endif // ^^^ !_HAS_CXX20 ^^^ } template