Skip to content

Commit bbd15ba

Browse files
<regex>: Limit backreference parsing to single digit for basic regular expressions (#5167)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent af0bd00 commit bbd15ba

File tree

2 files changed

+27
-12
lines changed

2 files changed

+27
-12
lines changed

stl/inc/regex

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,8 +1187,6 @@ _NODISCARD bool operator!=(const match_results<_BidIt, _Alloc>& _Left, const mat
11871187
}
11881188
#endif // !_HAS_CXX20
11891189

1190-
_INLINE_VAR constexpr unsigned int _BRE_MAX_GRP = 9U;
1191-
11921190
_INLINE_VAR constexpr unsigned int _Bmp_max = 256U; // must fit in an unsigned int
11931191
_INLINE_VAR constexpr unsigned int _Bmp_shift = 3U;
11941192
_INLINE_VAR constexpr unsigned int _Bmp_chrs = 1U << _Bmp_shift; // # of bits to be stored in each char
@@ -1705,7 +1703,7 @@ private:
17051703

17061704
// parsing
17071705
int _Do_digits(int _Base, int _Count, regex_constants::error_type _Error_type);
1708-
bool _DecimalDigits(regex_constants::error_type _Error_type);
1706+
bool _DecimalDigits2(regex_constants::error_type _Error_type, int _Count = INT_MAX);
17091707
void _HexDigits(int);
17101708
bool _OctalDigits();
17111709
void _Do_ex_class(_Meta_type);
@@ -3943,9 +3941,9 @@ int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits(
39433941
}
39443942

39453943
template <class _FwdIt, class _Elem, class _RxTraits>
3946-
bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits(
3947-
regex_constants::error_type _Error_type) { // check for decimal value
3948-
return _Do_digits(10, INT_MAX, _Error_type) != INT_MAX;
3944+
bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits2(
3945+
const regex_constants::error_type _Error_type, const int _Count /* = INT_MAX */) { // check for decimal value
3946+
return _Do_digits(10, _Count, _Error_type) != _Count;
39493947
}
39503948

39513949
template <class _FwdIt, class _Elem, class _RxTraits>
@@ -4034,7 +4032,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class
40344032
return _Prs_chr;
40354033
} else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(false)) {
40364034
return _Prs_set;
4037-
} else if (_DecimalDigits(regex_constants::error_escape)) { // check for invalid value
4035+
} else if (_DecimalDigits2(regex_constants::error_escape)) { // check for invalid value
40384036
if (_Val != 0) {
40394037
_Error(regex_constants::error_escape);
40404038
}
@@ -4332,15 +4330,17 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape() { // check for valid
43324330

43334331
template <class _FwdIt, class _Elem, class _RxTraits>
43344332
void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape
4335-
if ((_L_flags & _L_bckr) && _DecimalDigits(regex_constants::error_backref)) { // check for valid back reference
4333+
constexpr int _Bre_max_backref_digits = 1;
4334+
if ((_L_flags & _L_bckr)
4335+
&& _DecimalDigits2(regex_constants::error_backref,
4336+
(_L_flags & _L_lim_bckr) ? _Bre_max_backref_digits : INT_MAX)) { // check for valid back reference
43364337
if (_Val == 0) { // handle \0
43374338
if (!(_L_flags & _L_bzr_chr)) {
43384339
_Error(regex_constants::error_escape);
43394340
} else {
43404341
_Nfa._Add_char(static_cast<_Elem>(_Val));
43414342
}
4342-
} else if (((_L_flags & _L_lim_bckr) && _BRE_MAX_GRP < static_cast<size_t>(_Val))
4343-
|| _Grp_idx < static_cast<size_t>(_Val) || _Finished_grps.size() <= static_cast<size_t>(_Val)
4343+
} else if (_Grp_idx < static_cast<size_t>(_Val) || _Finished_grps.size() <= static_cast<size_t>(_Val)
43444344
|| !_Finished_grps[static_cast<size_t>(_Val)]) {
43454345
_Error(regex_constants::error_backref);
43464346
} else {
@@ -4364,7 +4364,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier
43644364
_Max = 1;
43654365
} else if (_Mchar == _Meta_lbr) { // check for valid bracketed value
43664366
_Next();
4367-
if (!_DecimalDigits(regex_constants::error_badbrace)) {
4367+
if (!_DecimalDigits2(regex_constants::error_badbrace)) {
43684368
_Error(regex_constants::error_badbrace);
43694369
}
43704370

@@ -4374,7 +4374,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier
43744374
} else { // check for decimal constant following comma
43754375
_Next();
43764376
if (_Mchar != _Meta_rbr) {
4377-
if (!_DecimalDigits(regex_constants::error_badbrace)) {
4377+
if (!_DecimalDigits2(regex_constants::error_badbrace)) {
43784378
_Error(regex_constants::error_badbrace);
43794379
}
43804380

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,20 @@ void test_gh_5160() {
669669
neg_regex.should_search_fail(L"xxxYxx\x2009xxxZxxx"); // U+2009 THIN SPACE
670670
}
671671

672+
void test_gh_5167() {
673+
// GH-5167: Limit backreference parsing to single digit for basic regular expressions
674+
g_regexTester.should_match("abab0", R"(\(ab*\)\10)", basic);
675+
g_regexTester.should_match("abab0", R"(\(ab*\)\10)", grep);
676+
g_regexTester.should_match("abbcdccdc5abb8", R"(\(ab*\)\([cd]*\)\25\18)", basic);
677+
g_regexTester.should_match("abbcdccdc5abb8", R"(\(ab*\)\([cd]*\)\25\18)", grep);
678+
g_regexTester.should_not_match("abbcdccdc5abb8", R"(\(ab*\)\([cd]*\)\15\28)", basic);
679+
g_regexTester.should_not_match("abbcdccdc5abb8", R"(\(ab*\)\([cd]*\)\15\28)", grep);
680+
g_regexTester.should_throw(R"(abc\1d)", error_backref, basic);
681+
g_regexTester.should_throw(R"(abc\1d)", error_backref, grep);
682+
g_regexTester.should_throw(R"(abc\10)", error_backref, basic);
683+
g_regexTester.should_throw(R"(abc\10)", error_backref, grep);
684+
}
685+
672686
int main() {
673687
test_dev10_449367_case_insensitivity_should_work();
674688
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
@@ -699,6 +713,7 @@ int main() {
699713
test_gh_4995();
700714
test_gh_5058();
701715
test_gh_5160();
716+
test_gh_5167();
702717

703718
return g_regexTester.result();
704719
}

0 commit comments

Comments
 (0)