Skip to content

Commit fb2ae67

Browse files
authored
Add a SearchValues ProbabilisticMap implementation that uses an ASCII fast path (#89155)
* Add a SearchValues ProbabilisticMap implementation that uses an ASCII fast path * Add comments and asserts around IOptimizations selection * Unused using
1 parent d1adf81 commit fb2ae67

File tree

5 files changed

+238
-14
lines changed

5 files changed

+238
-14
lines changed

src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@
415415
<Compile Include="$(MSBuildThisFileDirectory)System\Index.cs" />
416416
<Compile Include="$(MSBuildThisFileDirectory)System\Reflection\Emit\ILGenerator.cs" />
417417
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitVector256.cs" />
418+
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticWithAsciiCharSearchValues.cs" />
418419
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleCharSearchValues.cs" />
419420
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleByteSearchValues.cs" />
420421
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2ByteSearchValues.cs" />

src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ internal static unsafe void ComputeBitmap<T>(ReadOnlySpan<T> values, out Vector2
6767

6868
if (value > 127)
6969
{
70-
// The values were modified concurrent with the call to SearchValues.Create
71-
ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
70+
continue;
7271
}
7372

7473
lookupLocal.Set(value);

src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticCharSearchValues.cs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
using System.Runtime.CompilerServices;
55
using System.Runtime.InteropServices;
6-
using System.Runtime.Intrinsics;
76

87
namespace System.Buffers
98
{
@@ -14,16 +13,6 @@ internal sealed class ProbabilisticCharSearchValues : SearchValues<char>
1413

1514
public ProbabilisticCharSearchValues(scoped ReadOnlySpan<char> values)
1615
{
17-
if (Vector128.IsHardwareAccelerated && values.Length < 8)
18-
{
19-
// ProbabilisticMap does a Span.Contains check to confirm potential matches.
20-
// If we have fewer than 8 values, pad them with existing ones to make the verification faster.
21-
Span<char> newValues = stackalloc char[8];
22-
newValues.Fill(values[0]);
23-
values.CopyTo(newValues);
24-
values = newValues;
25-
}
26-
2716
_values = new string(values);
2817
_map = new ProbabilisticMap(_values);
2918
}
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics;
5+
using System.Runtime.CompilerServices;
6+
using System.Runtime.InteropServices;
7+
using System.Runtime.Intrinsics;
8+
using System.Runtime.Intrinsics.Wasm;
9+
using System.Runtime.Intrinsics.X86;
10+
11+
namespace System.Buffers
12+
{
13+
internal sealed class ProbabilisticWithAsciiCharSearchValues<TOptimizations> : SearchValues<char>
14+
where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations
15+
{
16+
private Vector256<byte> _asciiBitmap;
17+
private Vector256<byte> _inverseAsciiBitmap;
18+
private ProbabilisticMap _map;
19+
private readonly string _values;
20+
21+
public ProbabilisticWithAsciiCharSearchValues(scoped ReadOnlySpan<char> values)
22+
{
23+
Debug.Assert(IndexOfAnyAsciiSearcher.IsVectorizationSupported);
24+
Debug.Assert(values.ContainsAnyInRange((char)0, (char)127));
25+
26+
IndexOfAnyAsciiSearcher.ComputeBitmap(values, out _asciiBitmap, out _);
27+
_inverseAsciiBitmap = ~_asciiBitmap;
28+
29+
_values = new string(values);
30+
_map = new ProbabilisticMap(_values);
31+
}
32+
33+
internal override char[] GetValues() => _values.ToCharArray();
34+
35+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
36+
internal override bool ContainsCore(char value) =>
37+
ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);
38+
39+
internal override int IndexOfAny(ReadOnlySpan<char> span)
40+
{
41+
int offset = 0;
42+
43+
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
44+
{
45+
// We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character.
46+
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
47+
48+
// If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
49+
// Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
50+
// Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
51+
// running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
52+
Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));
53+
54+
if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
55+
{
56+
Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");
57+
58+
offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
59+
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
60+
span.Length,
61+
ref _inverseAsciiBitmap);
62+
}
63+
else
64+
{
65+
Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
66+
"The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");
67+
68+
offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
69+
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
70+
span.Length,
71+
ref _inverseAsciiBitmap);
72+
}
73+
74+
// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
75+
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
76+
{
77+
return offset;
78+
}
79+
80+
// Fall back to using the ProbabilisticMap.
81+
span = span.Slice(offset);
82+
}
83+
84+
int index = ProbabilisticMap.IndexOfAny(
85+
ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
86+
ref MemoryMarshal.GetReference(span),
87+
span.Length,
88+
_values);
89+
90+
if (index >= 0)
91+
{
92+
// We found a match. Account for the number of ASCII characters we've skipped previously.
93+
index += offset;
94+
}
95+
96+
return index;
97+
}
98+
99+
internal override int IndexOfAnyExcept(ReadOnlySpan<char> span)
100+
{
101+
int offset = 0;
102+
103+
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
104+
{
105+
// Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
106+
offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
107+
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
108+
span.Length,
109+
ref _asciiBitmap);
110+
111+
// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
112+
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
113+
{
114+
return offset;
115+
}
116+
117+
// Fall back to a simple char-by-char search.
118+
span = span.Slice(offset);
119+
}
120+
121+
int index = ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
122+
ref MemoryMarshal.GetReference(span),
123+
span.Length,
124+
_values);
125+
126+
if (index >= 0)
127+
{
128+
// We found a match. Account for the number of ASCII characters we've skipped previously.
129+
index += offset;
130+
}
131+
132+
return index;
133+
}
134+
135+
internal override int LastIndexOfAny(ReadOnlySpan<char> span)
136+
{
137+
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
138+
{
139+
// We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character.
140+
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).
141+
142+
// If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
143+
// Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
144+
// Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
145+
// running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
146+
Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));
147+
148+
int offset;
149+
150+
if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
151+
{
152+
Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");
153+
154+
offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
155+
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
156+
span.Length,
157+
ref _inverseAsciiBitmap);
158+
}
159+
else
160+
{
161+
Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
162+
"The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");
163+
164+
offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
165+
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
166+
span.Length,
167+
ref _inverseAsciiBitmap);
168+
}
169+
170+
// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
171+
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
172+
{
173+
return offset;
174+
}
175+
176+
// Fall back to using the ProbabilisticMap.
177+
span = span.Slice(0, offset + 1);
178+
}
179+
180+
return ProbabilisticMap.LastIndexOfAny(
181+
ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
182+
ref MemoryMarshal.GetReference(span),
183+
span.Length,
184+
_values);
185+
}
186+
187+
internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span)
188+
{
189+
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
190+
{
191+
// Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
192+
int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
193+
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
194+
span.Length,
195+
ref _asciiBitmap);
196+
197+
// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
198+
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
199+
{
200+
return offset;
201+
}
202+
203+
// Fall back to a simple char-by-char search.
204+
span = span.Slice(0, offset + 1);
205+
}
206+
207+
return ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
208+
ref MemoryMarshal.GetReference(span),
209+
span.Length,
210+
_values);
211+
}
212+
}
213+
}

src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,29 @@ ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(values)),
140140
return new Latin1CharSearchValues(values);
141141
}
142142

143-
return new ProbabilisticCharSearchValues(values);
143+
scoped ReadOnlySpan<char> probabilisticValues = values;
144+
145+
if (Vector128.IsHardwareAccelerated && values.Length < 8)
146+
{
147+
// ProbabilisticMap does a Span.Contains check to confirm potential matches.
148+
// If we have fewer than 8 values, pad them with existing ones to make the verification faster.
149+
Span<char> newValues = stackalloc char[8];
150+
newValues.Fill(values[0]);
151+
values.CopyTo(newValues);
152+
probabilisticValues = newValues;
153+
}
154+
155+
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && minInclusive < 128)
156+
{
157+
// If we have both ASCII and non-ASCII characters, use an implementation that
158+
// does an optimistic ASCII fast-path and then falls back to the ProbabilisticMap.
159+
160+
return (Ssse3.IsSupported || PackedSimd.IsSupported) && probabilisticValues.Contains('\0')
161+
? new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(probabilisticValues)
162+
: new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Default>(probabilisticValues);
163+
}
164+
165+
return new ProbabilisticCharSearchValues(probabilisticValues);
144166
}
145167

146168
private static bool TryGetSingleRange<T>(ReadOnlySpan<T> values, out T minInclusive, out T maxInclusive)

0 commit comments

Comments
 (0)