Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
664bb63
benchmark
AlexGuteniev May 12, 2025
3d6a9b4
test coverage
AlexGuteniev May 13, 2025
95ad7d1
vectorization
AlexGuteniev May 13, 2025
7cdd924
Drop unnecessary `std::`.
StephanTLavavej May 15, 2025
5f5bd0e
Add missing calling convention to `__std_rotate`.
StephanTLavavej May 15, 2025
01d7359
Keep `actual` and `expected` in sync with less work.
StephanTLavavej May 15, 2025
f92550d
Cite GH 5506.
StephanTLavavej May 15, 2025
0b7c678
upper_address => higher_address
StephanTLavavej May 15, 2025
0590287
I see dead variables. They don't even know they're dead.
StephanTLavavej May 15, 2025
6b6e37e
Sources should point to const.
StephanTLavavej May 15, 2025
bf77cd2
ranges coverage
AlexGuteniev May 16, 2025
19b5aa2
integer class diffference coverage
AlexGuteniev May 16, 2025
d60725d
ranges benchmark
AlexGuteniev May 16, 2025
fcc41cc
more benchmark cases
AlexGuteniev May 16, 2025
ef3be12
ranges codepath
AlexGuteniev May 16, 2025
55156e9
we want swappable
AlexGuteniev May 16, 2025
a7b6a6e
Properly detect element volatility.
StephanTLavavej May 16, 2025
498b759
Use `_Is_trivially_ranges_swappable`...
StephanTLavavej May 16, 2025
6a5c9f1
... so we can revert changes to ADL tests.
StephanTLavavej May 16, 2025
501a0e4
C++20 should directly use `contiguous_iterator`.
StephanTLavavej May 16, 2025
76d3b38
Test `_HAS_CXX20` positively.
StephanTLavavej May 16, 2025
d32b1b1
Merge branch 'main' into swirl
StephanTLavavej May 17, 2025
085f273
Merge branch 'main' into swirl
StephanTLavavej May 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ add_benchmark(random_integer_generation src/random_integer_generation.cpp)
add_benchmark(remove src/remove.cpp)
add_benchmark(replace src/replace.cpp)
add_benchmark(reverse src/reverse.cpp)
add_benchmark(rotate src/rotate.cpp)
add_benchmark(search src/search.cpp)
add_benchmark(search_n src/search_n.cpp)
add_benchmark(std_copy src/std_copy.cpp)
Expand Down
58 changes: 58 additions & 0 deletions benchmarks/src/rotate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstdint>
#include <vector>

#include "skewed_allocator.hpp"
#include "utility.hpp"

using namespace std;

enum class AlgType { Std, Rng };

template <class T, AlgType Alg>
void bm_rotate(benchmark::State& state) {
const auto size = static_cast<size_t>(state.range(0));
const auto n = static_cast<size_t>(state.range(1));

auto v = random_vector<T, not_highly_aligned_allocator>(size);
benchmark::DoNotOptimize(v);

for (auto _ : state) {
if constexpr (Alg == AlgType::Std) {
rotate(v.begin(), v.begin() + n, v.end());
} else {
ranges::rotate(v, v.begin() + n);
}
benchmark::DoNotOptimize(v);
}
}

void common_args(auto bm) {
bm->Args({3333, 2242})->Args({3332, 1666})->Args({3333, 1111})->Args({3333, 501});
bm->Args({3333, 3300})->Args({3333, 12})->Args({3333, 5})->Args({3333, 1});
bm->Args({333, 101})->Args({123, 32})->Args({23, 7})->Args({12, 5})->Args({3, 2});
}

struct color {
uint16_t h;
uint16_t s;
uint16_t l;
};

BENCHMARK(bm_rotate<uint8_t, AlgType::Std>)->Apply(common_args);
BENCHMARK(bm_rotate<uint8_t, AlgType::Rng>)->Apply(common_args);
BENCHMARK(bm_rotate<uint16_t, AlgType::Std>)->Apply(common_args);
BENCHMARK(bm_rotate<uint16_t, AlgType::Rng>)->Apply(common_args);
BENCHMARK(bm_rotate<uint32_t, AlgType::Std>)->Apply(common_args);
BENCHMARK(bm_rotate<uint32_t, AlgType::Rng>)->Apply(common_args);
BENCHMARK(bm_rotate<uint64_t, AlgType::Std>)->Apply(common_args);
BENCHMARK(bm_rotate<uint64_t, AlgType::Rng>)->Apply(common_args);

BENCHMARK(bm_rotate<color, AlgType::Std>)->Apply(common_args);
BENCHMARK(bm_rotate<color, AlgType::Rng>)->Apply(common_args);

BENCHMARK_MAIN();
13 changes: 13 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -5768,6 +5768,19 @@ namespace ranges {
}

if constexpr (bidirectional_iterator<_It>) {
#if _USE_STD_VECTOR_ALGORITHMS
using _Elem = remove_reference_t<iter_reference_t<_It>>;

if constexpr (contiguous_iterator<_It> && sized_sentinel_for<_Se, _It>
&& conjunction_v<_Is_trivially_ranges_swappable<_Elem>, negation<is_volatile<_Elem>>>) {
if (!_STD is_constant_evaluated()) {
const _It _Last_it = _First + (_Last - _First);
::__std_rotate(_STD to_address(_First), _STD to_address(_Mid), _STD to_address(_Last_it));
return {_First + (_Last - _Mid), _Last};
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

_RANGES _Reverse_common(_First, _Mid);
auto _Final = _RANGES _Get_final_iterator_unwrapped<_It>(_Mid, _STD move(_Last));
_RANGES _Reverse_common(_Mid, _Final);
Expand Down
13 changes: 13 additions & 0 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs
__declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias(
void* _First1, void* _Last1, void* _First2) noexcept;

__declspec(noalias) void __stdcall __std_rotate(void* _First, void* _Mid, void* _Last) noexcept;

__declspec(noalias) size_t __stdcall __std_count_trivial_1(
const void* _First, const void* _Last, uint8_t _Val) noexcept;
__declspec(noalias) size_t __stdcall __std_count_trivial_2(
Expand Down Expand Up @@ -6597,6 +6599,17 @@ _CONSTEXPR20 _FwdIt rotate(_FwdIt _First, _FwdIt _Mid, _FwdIt _Last) {
}

if constexpr (_Is_cpp17_random_iter_v<_FwdIt>) {
#if _USE_STD_VECTOR_ALGORITHMS
using _Elem = remove_reference_t<_Iter_ref_t<decltype(_UFirst)>>;

if constexpr (conjunction_v<bool_constant<_Iterator_is_contiguous<decltype(_UFirst)>>,
_Is_trivially_swappable<_Elem>, negation<is_volatile<_Elem>>>) {
if (!_STD _Is_constant_evaluated()) {
::__std_rotate(_STD _To_address(_UFirst), _STD _To_address(_UMid), _STD _To_address(_ULast));
return _First + (_Last - _Mid);
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
_STD reverse(_UFirst, _UMid);
_STD reverse(_UMid, _ULast);
_STD reverse(_UFirst, _ULast);
Expand Down
101 changes: 101 additions & 0 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,107 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(

} // extern "C"

namespace {
namespace _Rotating {
// TRANSITION, GH-5506 "VCRuntime: memmove() is surprisingly slow for more than 8 KB on certain CPUs":
// As a workaround, the following code calls memmove() for 8 KB portions.
constexpr size_t _Portion_size = 8192;
constexpr size_t _Portion_mask = _Portion_size - 1;
static_assert((_Portion_size & _Portion_mask) == 0);

void _Move_to_lower_address(void* _Dest, const void* _Src, const size_t _Size) noexcept {
const size_t _Whole_portions_size = _Size & ~_Portion_mask;

void* _Dest_end = _Dest;
_Advance_bytes(_Dest_end, _Whole_portions_size);

while (_Dest != _Dest_end) {
memmove(_Dest, _Src, _Portion_size);
_Advance_bytes(_Dest, _Portion_size);
_Advance_bytes(_Src, _Portion_size);
}

if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) {
memmove(_Dest, _Src, _Tail);
}
}

void _Move_to_higher_address(void* const _Dest, const void* const _Src, const size_t _Size) noexcept {
const size_t _Whole_portions_size = _Size & ~_Portion_mask;

void* _Dest_end = _Dest;
_Advance_bytes(_Dest_end, _Whole_portions_size);
const void* _Src_end = _Src;
_Advance_bytes(_Src_end, _Whole_portions_size);

if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) {
memmove(_Dest_end, _Src_end, _Tail);
}

while (_Dest_end != _Dest) {
_Rewind_bytes(_Dest_end, _Portion_size);
_Rewind_bytes(_Src_end, _Portion_size);
memmove(_Dest_end, _Src_end, _Portion_size);
}
}

constexpr size_t _Buf_size = 512;

bool _Use_buffer(const size_t _Smaller, const size_t _Larger) noexcept {
return _Smaller <= _Buf_size && (_Smaller <= 128 || _Larger >= _Smaller * 2);
}
} // namespace _Rotating
} // unnamed namespace

extern "C" {

__declspec(noalias) void __stdcall __std_rotate(void* _First, void* const _Mid, void* _Last) noexcept {
unsigned char _Buf[_Rotating::_Buf_size];

for (;;) {
const size_t _Left = _Byte_length(_First, _Mid);
const size_t _Right = _Byte_length(_Mid, _Last);

if (_Left <= _Right) {
if (_Left == 0) {
break;
}

if (_Rotating::_Use_buffer(_Left, _Right)) {
memcpy(_Buf, _First, _Left);
_Rotating::_Move_to_lower_address(_First, _Mid, _Right);
_Advance_bytes(_First, _Right);
memcpy(_First, _Buf, _Left);
break;
}

void* _Mid2 = _Last;
_Rewind_bytes(_Mid2, _Left);
__std_swap_ranges_trivially_swappable_noalias(_Mid2, _Last, _First);
_Last = _Mid2;
} else {
if (_Right == 0) {
break;
}

if (_Rotating::_Use_buffer(_Right, _Left)) {
_Rewind_bytes(_Last, _Right);
memcpy(_Buf, _Last, _Right);
void* _Mid2 = _First;
_Advance_bytes(_Mid2, _Right);
_Rotating::_Move_to_higher_address(_Mid2, _First, _Left);
memcpy(_First, _Buf, _Right);
break;
}

__std_swap_ranges_trivially_swappable_noalias(_Mid, _Last, _First);
_Advance_bytes(_First, _Right);
}
}
}

} // extern "C"

namespace {
namespace _Sorting {
enum _Min_max_mode {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ int main() {
picky_contiguous_iterator float_arr_begin(begin(float_arr));
picky_contiguous_iterator float_arr_end(end(float_arr));

transform(arr_begin, arr_end, float_arr_begin, [](int v) { return static_cast<float>(v); });
transform(arr_begin, arr_end, float_arr_begin, [](const int v) { return static_cast<float>(v); });

assert(ranges::min(ranges::subrange(float_arr_begin, float_arr_end)) == 200.0);
assert(ranges::max(ranges::subrange(float_arr_begin, float_arr_end)) == 390.0);
Expand Down Expand Up @@ -196,6 +196,30 @@ int main() {
ranges::reverse(temp_begin, temp_end);
assert(ranges::equal(temp_begin, temp_end, begin(reverse_expected), end(reverse_expected)));
}
{
const int rotate_expected[] = {
250, 270, 280, 290, 300, 310, 320, 250, 340, 250, 250, 370, 380, 390, 200, 210, 220, 250, 240, 250};

const _Signed128 rotate_pos = 6;

auto rot_copy_it = rotate_copy(arr_begin, arr_begin + rotate_pos, arr_end, temp_begin);
assert(equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected)));
assert(rot_copy_it == temp_end);

copy(arr_begin, arr_end, temp_begin);
auto rot_it = rotate(temp_begin, temp_begin + rotate_pos, temp_end);
assert(equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected)));
assert(rot_it == temp_end - rotate_pos);

auto r_rot_copy_it = ranges::rotate_copy(arr_begin, arr_begin + rotate_pos, arr_end, temp_begin).out;
assert(ranges::equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected)));
assert(r_rot_copy_it == temp_end);

ranges::copy(arr_begin, arr_end, temp_begin);
auto r_rot_it = begin(ranges::rotate(temp_begin, temp_begin + rotate_pos, temp_end));
assert(ranges::equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected)));
assert(r_rot_it == temp_end - rotate_pos);
}
{
// Out of replace family, only replace for 32-bit and 64-bit elements is manually vectorized,
// replace_copy is auto vectorized (along with replace_copy_if)
Expand Down
72 changes: 72 additions & 0 deletions tests/std/tests/VSO_0000000_vector_algorithms/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,65 @@ void test_reverse_copy(mt19937_64& gen) {
}
}

template <class RanIt>
void last_known_good_rotate(
RanIt first, RanIt mid, RanIt last, vector<typename iterator_traits<RanIt>::value_type>& tmp) {
const auto size_left = mid - first;
const auto size_right = last - mid;
if (size_left <= size_right) {
tmp.assign(first, mid);
move_backward(mid, last, last - size_left);
move(tmp.begin(), tmp.end(), last - size_left);
} else {
tmp.assign(mid, last);
move(first, mid, first + size_right);
move(tmp.begin(), tmp.end(), first);
}
}

template <class T>
void test_case_rotate(
vector<T>& actual, vector<T>& actual_r, vector<T>& expected, const ptrdiff_t pos, vector<T>& tmp) {
const ptrdiff_t shift = static_cast<ptrdiff_t>(expected.size()) - pos;
last_known_good_rotate(expected.begin(), expected.begin() + pos, expected.end(), tmp);
const auto it = rotate(actual.begin(), actual.begin() + pos, actual.end());
assert(expected == actual);
assert(it == actual.begin() + shift);
#if _HAS_CXX20
const auto rng = ranges::rotate(actual_r.begin(), actual_r.begin() + pos, actual_r.end());
assert(expected == actual_r);
assert(begin(rng) == actual_r.begin() + shift);
assert(end(rng) == actual_r.end());
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
(void) actual_r;
#endif // ^^^ !_HAS_CXX20 ^^^
}

template <class T>
void test_rotate(mt19937_64& gen) {
vector<T> actual;
vector<T> actual_r;
vector<T> expected;
vector<T> tmp;
actual.reserve(dataCount);
actual_r.reserve(dataCount);
expected.reserve(dataCount);
tmp.reserve(dataCount);
test_case_rotate(actual, actual_r, expected, 0, tmp);
for (size_t attempts = 0; attempts < dataCount; ++attempts) {
const T val = static_cast<T>(gen()); // intentionally narrows
actual.push_back(val);
actual_r.push_back(val);
expected.push_back(val);

uniform_int_distribution<ptrdiff_t> dis_pos(0, static_cast<ptrdiff_t>(attempts) + 1);

for (size_t pos_count = 0; pos_count != 5; ++pos_count) {
test_case_rotate(actual, actual_r, expected, dis_pos(gen), tmp);
}
}
}

template <class FwdIt1, class FwdIt2>
FwdIt2 last_known_good_swap_ranges(FwdIt1 first1, const FwdIt1 last1, FwdIt2 dest) {
for (; first1 != last1; ++first1, ++dest) {
Expand Down Expand Up @@ -1182,6 +1241,19 @@ void test_vector_algorithms(mt19937_64& gen) {
test_reverse_copy<double>(gen);
test_reverse_copy<long double>(gen);

test_rotate<char>(gen);
test_rotate<signed char>(gen);
test_rotate<unsigned char>(gen);
test_rotate<short>(gen);
test_rotate<unsigned short>(gen);
test_rotate<int>(gen);
test_rotate<unsigned int>(gen);
test_rotate<long long>(gen);
test_rotate<unsigned long long>(gen);
test_rotate<float>(gen);
test_rotate<double>(gen);
test_rotate<long double>(gen);

test_remove<char>(gen);
test_remove<signed char>(gen);
test_remove<unsigned char>(gen);
Expand Down