Skip to content

Commit 8aa8f97

Browse files
<regex>: Speed up skip optimization for default regex_traits in collate mode (#5672)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 1449cee commit 8aa8f97

File tree

3 files changed

+37
-2
lines changed

3 files changed

+37
-2
lines changed

benchmarks/src/regex_search.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
#include "lorem.hpp"
99

1010
using namespace std;
11+
using namespace regex_constants;
1112

12-
void bm_lorem_search(benchmark::State& state, const char* pattern) {
13+
void bm_lorem_search(benchmark::State& state, const char* pattern, syntax_option_type syntax = ECMAScript) {
1314
string repeated_lorem{lorem_ipsum};
1415
for (long long i = 0; i < state.range(); ++i) {
1516
repeated_lorem += repeated_lorem;
1617
}
17-
regex re{pattern};
18+
regex re{pattern, syntax};
1819

1920
for (auto _ : state) {
2021
benchmark::DoNotOptimize(repeated_lorem);
@@ -33,6 +34,7 @@ void bm_lorem_search(benchmark::State& state, const char* pattern) {
3334

3435
BENCHMARK_CAPTURE(bm_lorem_search, "^bibe", "^bibe")->Arg(2)->Arg(3)->Arg(4);
3536
BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4);
37+
BENCHMARK_CAPTURE(bm_lorem_search, "bibe".collate, "bibe", regex_constants::collate)->Arg(2)->Arg(3)->Arg(4);
3638
BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)", "(bibe)")->Arg(2)->Arg(3)->Arg(4);
3739
BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4);
3840
BENCHMARK_CAPTURE(bm_lorem_search, "(?:bibe)+", "(?:bibe)+")->Arg(2)->Arg(3)->Arg(4);

stl/inc/regex

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3690,6 +3690,8 @@ _BidIt1 _Compare_translate_both(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2,
36903690
// compare character ranges, translating characters in both ranges according to syntax options
36913691
if (_Sflags & regex_constants::icase) {
36923692
return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_icase<_RxTraits>{_Traits});
3693+
} else if constexpr (_Is_any_of_v<_RxTraits, regex_traits<char>, regex_traits<wchar_t>>) {
3694+
return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, equal_to<typename _RxTraits::char_type>{});
36933695
} else if (_Sflags & regex_constants::collate) {
36943696
return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_collate<_RxTraits>{_Traits});
36953697
} else {
@@ -3703,6 +3705,8 @@ _BidIt1 _Compare_translate_left(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2,
37033705
// compare character ranges, translating characters in the left range according to syntax options
37043706
if (_Sflags & regex_constants::icase) {
37053707
return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_icase_translateleft<_RxTraits>{_Traits});
3708+
} else if constexpr (_Is_any_of_v<_RxTraits, regex_traits<char>, regex_traits<wchar_t>>) {
3709+
return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, equal_to<typename _RxTraits::char_type>{});
37063710
} else if (_Sflags & regex_constants::collate) {
37073711
return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_collate_translateleft<_RxTraits>{_Traits});
37083712
} else {
@@ -3717,6 +3721,8 @@ _BidIt1 _Search_translate_left(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2,
37173721
// after translating characters in the left sequence according to syntax options
37183722
if (_Sflags & regex_constants::icase) {
37193723
return _STD search(_Begin1, _End1, _Begin2, _End2, _Cmp_icase_translateleft<_RxTraits>{_Traits});
3724+
} else if constexpr (_Is_any_of_v<_RxTraits, regex_traits<char>, regex_traits<wchar_t>>) {
3725+
return _STD search(_Begin1, _End1, _Begin2, _End2, equal_to<typename _RxTraits::char_type>{});
37203726
} else if (_Sflags & regex_constants::collate) {
37213727
return _STD search(_Begin1, _End1, _Begin2, _End2, _Cmp_collate_translateleft<_RxTraits>{_Traits});
37223728
} else {

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2103,6 +2103,32 @@ void test_gh_5576() {
21032103
match_default, "AbGweEXfFlXlLLlffflEXlF");
21042104
}
21052105

2106+
void test_gh_5672() {
2107+
// GH-5672: Speed up skip optimization for default `regex_traits` in `collate` mode
2108+
// The PR added a faster branch in the skip optimization when matching in collate mode
2109+
// for default `regex_traits<char>` and `regex_traits<wchar_t>`.
2110+
// The following tests check that searching still works correctly when the faster branch is engaged.
2111+
{
2112+
test_regex collating_re(&g_regexTester, "g", regex_constants::collate);
2113+
2114+
collating_re.should_search_match("abcdefghijklmnopqrstuvwxyz", "g");
2115+
collating_re.should_search_fail("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
2116+
collating_re.should_search_match("zyxwvutsrqponmlkjihgfedcba", "g");
2117+
collating_re.should_search_fail("ZYXWVUTSRQPONMLKJIHGFEDCBA");
2118+
collating_re.should_search_fail("zyxwvutsrqponmlkjihedcba");
2119+
}
2120+
2121+
{
2122+
test_wregex collating_re(&g_regexTester, L"g", regex_constants::collate);
2123+
2124+
collating_re.should_search_match(L"abcdefghijklmnopqrstuvwxyz", L"g");
2125+
collating_re.should_search_fail(L"ABCDEFGHIJKLMNOPQRSTUVWXYZ");
2126+
collating_re.should_search_match(L"zyxwvutsrqponmlkjihgfedcba", L"g");
2127+
collating_re.should_search_fail(L"ZYXWVUTSRQPONMLKJIHGFEDCBA");
2128+
collating_re.should_search_fail(L"zyxwvutsrqponmlkjihedcba");
2129+
}
2130+
}
2131+
21062132
int main() {
21072133
test_dev10_449367_case_insensitivity_should_work();
21082134
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
@@ -2153,6 +2179,7 @@ int main() {
21532179
test_gh_5490();
21542180
test_gh_5509();
21552181
test_gh_5576();
2182+
test_gh_5672();
21562183

21572184
return g_regexTester.result();
21582185
}

0 commit comments

Comments
 (0)