@@ -382,6 +382,107 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
382
382
383
383
} // extern "C"
384
384
385
+ namespace {
386
+ namespace _Rotating {
387
+ // TRANSITION, GH-5506 "VCRuntime: memmove() is surprisingly slow for more than 8 KB on certain CPUs":
388
+ // As a workaround, the following code calls memmove() for 8 KB portions.
389
+ constexpr size_t _Portion_size = 8192 ;
390
+ constexpr size_t _Portion_mask = _Portion_size - 1 ;
391
+ static_assert ((_Portion_size & _Portion_mask) == 0 );
392
+
393
+ void _Move_to_lower_address (void * _Dest, const void * _Src, const size_t _Size) noexcept {
394
+ const size_t _Whole_portions_size = _Size & ~_Portion_mask;
395
+
396
+ void * _Dest_end = _Dest;
397
+ _Advance_bytes (_Dest_end, _Whole_portions_size);
398
+
399
+ while (_Dest != _Dest_end) {
400
+ memmove (_Dest, _Src, _Portion_size);
401
+ _Advance_bytes (_Dest, _Portion_size);
402
+ _Advance_bytes (_Src, _Portion_size);
403
+ }
404
+
405
+ if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0 ) {
406
+ memmove (_Dest, _Src, _Tail);
407
+ }
408
+ }
409
+
410
+ void _Move_to_higher_address (void * const _Dest, const void * const _Src, const size_t _Size) noexcept {
411
+ const size_t _Whole_portions_size = _Size & ~_Portion_mask;
412
+
413
+ void * _Dest_end = _Dest;
414
+ _Advance_bytes (_Dest_end, _Whole_portions_size);
415
+ const void * _Src_end = _Src;
416
+ _Advance_bytes (_Src_end, _Whole_portions_size);
417
+
418
+ if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0 ) {
419
+ memmove (_Dest_end, _Src_end, _Tail);
420
+ }
421
+
422
+ while (_Dest_end != _Dest) {
423
+ _Rewind_bytes (_Dest_end, _Portion_size);
424
+ _Rewind_bytes (_Src_end, _Portion_size);
425
+ memmove (_Dest_end, _Src_end, _Portion_size);
426
+ }
427
+ }
428
+
429
+ constexpr size_t _Buf_size = 512 ;
430
+
431
+ bool _Use_buffer (const size_t _Smaller, const size_t _Larger) noexcept {
432
+ return _Smaller <= _Buf_size && (_Smaller <= 128 || _Larger >= _Smaller * 2 );
433
+ }
434
+ } // namespace _Rotating
435
+ } // unnamed namespace
436
+
437
+ extern " C" {
438
+
439
+ __declspec (noalias) void __stdcall __std_rotate(void * _First, void * const _Mid, void * _Last) noexcept {
440
+ unsigned char _Buf[_Rotating::_Buf_size];
441
+
442
+ for (;;) {
443
+ const size_t _Left = _Byte_length (_First, _Mid);
444
+ const size_t _Right = _Byte_length (_Mid, _Last);
445
+
446
+ if (_Left <= _Right) {
447
+ if (_Left == 0 ) {
448
+ break ;
449
+ }
450
+
451
+ if (_Rotating::_Use_buffer (_Left, _Right)) {
452
+ memcpy (_Buf, _First, _Left);
453
+ _Rotating::_Move_to_lower_address (_First, _Mid, _Right);
454
+ _Advance_bytes (_First, _Right);
455
+ memcpy (_First, _Buf, _Left);
456
+ break ;
457
+ }
458
+
459
+ void * _Mid2 = _Last;
460
+ _Rewind_bytes (_Mid2, _Left);
461
+ __std_swap_ranges_trivially_swappable_noalias (_Mid2, _Last, _First);
462
+ _Last = _Mid2;
463
+ } else {
464
+ if (_Right == 0 ) {
465
+ break ;
466
+ }
467
+
468
+ if (_Rotating::_Use_buffer (_Right, _Left)) {
469
+ _Rewind_bytes (_Last, _Right);
470
+ memcpy (_Buf, _Last, _Right);
471
+ void * _Mid2 = _First;
472
+ _Advance_bytes (_Mid2, _Right);
473
+ _Rotating::_Move_to_higher_address (_Mid2, _First, _Left);
474
+ memcpy (_First, _Buf, _Right);
475
+ break ;
476
+ }
477
+
478
+ __std_swap_ranges_trivially_swappable_noalias (_Mid, _Last, _First);
479
+ _Advance_bytes (_First, _Right);
480
+ }
481
+ }
482
+ }
483
+
484
+ } // extern "C"
485
+
385
486
namespace {
386
487
namespace _Sorting {
387
488
enum _Min_max_mode {
0 commit comments