Skip to content

[RegexDiff X64] [stephentoub] Coalesce adjacent equivalent anchors #1284

@MihuBot

Description

@MihuBot

Job completed in 18 minutes 10 seconds (remote runner delay: 54 seconds).
dotnet/runtime#118083
Using arguments: regexdiff

79 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"(?<desc>h|ampm|am\\b|a\\.m\\.|a m\\b|a\\. m\ ..." (114 uses)
[GeneratedRegex("(?<desc>h|ampm|am\\b|a\\.m\\.|a m\\b|a\\. m\\.|a\\.m\\b|a\\. m\\b|pm\\b|p\\.m\\.|p m\\b|p\\. m\\.|p\\.m\\b|p\\. m\\b|p\\b\\b)", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
  ///         ○ Match a sequence of expressions.<br/>
  ///             ○ Match a character in the set [Pp].<br/>
  ///             ○ Match if at a word boundary.<br/>
-   ///             ○ Match if at a word boundary.<br/>
  /// </code>
  /// </remarks>
  [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "42.42.42.42")]
                                  return false; // The input didn't match.
                              }
                              
-                               // Match if at a word boundary.
-                               if (!Utilities.IsBoundary(inputSpan, pos + 1))
-                               {
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                               }
-                               
                              pos++;
                              slice = inputSpan.Slice(pos);
                          }
"^^(?<AmsNetId>((?<First>\\d{1,3})\\.(?<Secon ..." (50 uses)
[GeneratedRegex("^^(?<AmsNetId>((?<First>\\d{1,3})\\.(?<Second>\\d{1,3})\\.(?<Third>\\d{1,3})\\.(?<Fourth>\\d{1,3})\\.(?<Fifth>\\d{1,3})\\.(?<Sixth>\\d{1,3})) | Local | Empty | LocalHost)(:(?<AdsPort>\\d+))?$$", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.CultureInvariant)]
  /// Explanation:<br/>
  /// <code>
  /// ○ Match if at the beginning of the string.<br/>
-   /// ○ Match if at the beginning of the string.<br/>
  /// ○ "AmsNetId" capture group.<br/>
  ///     ○ Match with 4 alternative expressions.<br/>
  ///         ○ 1st capture group.<br/>
  ///         ○ "AdsPort" capture group.<br/>
  ///             ○ Match a Unicode digit atomically at least once.<br/>
  /// ○ Match if at the end of the string or if before an ending newline.<br/>
-   /// ○ Match if at the end of the string or if before an ending newline.<br/>
  /// </code>
  /// </remarks>
  [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "42.42.42.42")]
                      return false; // The input didn't match.
                  }
                  
-                   // Match if at the beginning of the string.
-                   if (pos != 0)
-                   {
-                       UncaptureUntil(0);
-                       return false; // The input didn't match.
-                   }
-                   
                  // "AmsNetId" capture group.
                  //{
                      capture_starting_pos = pos;
                      goto LoopIterationNoMatch;
                  }
                  
-                   // Match if at the end of the string or if before an ending newline.
-                   if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
-                   {
-                       goto LoopIterationNoMatch;
-                   }
-                   
                  // The input matched.
                  base.runtextpos = pos;
                  base.Capture(0, matchStart, pos);
"\\s+([`~!@#$%^&*\\(\\)=+\\|\\[\\]{};':,.<>?< ..." (26 uses)
[GeneratedRegex("\\s+([`~!@#$%^&*\\(\\)=+\\|\\[\\]{};':,.<>?<《》,。?;‘’“”:、¥!…()])+\\s+")]
             }
         }
         
-        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085              \u2028\u2029  ".</summary>
-        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085              \u2028\u2029  ");
+        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
+        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
     }
 }
"\\s+([`~!@#$%^&*\\(\\)\\-_=+\\\\|\\[\\]{};': ..." (26 uses)
[GeneratedRegex("\\s+([`~!@#$%^&*\\(\\)\\-_=+\\\\|\\[\\]{};':,.<>/?<《》,。?;‘’“”:、—¥!…—()])+\\s+")]
             }
         }
         
-        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085              \u2028\u2029  ".</summary>
-        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085              \u2028\u2029  ");
+        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
+        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
     }
 }
"^\\s+(?<guid>\\{[0-9a-zA-Z]{8}-[0-9a-zA-Z]{4 ..." (26 uses)
[GeneratedRegex("^\\s+(?<guid>\\{[0-9a-zA-Z]{8}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{12}\\})\\s+=\\s+(?<dep>\\{[0-9a-zA-Z]{8}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{4}-[0-9a-zA-Z]{12}\\})", RegexOptions.Multiline)]
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
-        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
+        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
+        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
     }
 }
"(?<id>\\S+)" (21 uses)
[GeneratedRegex("(?<id>\\S+)")]
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
-        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
+        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
+        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
     }
 }
"(?<id>\\S+)(?<version>.*)" (21 uses)
[GeneratedRegex("(?<id>\\S+)(?<version>.*)")]
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
-        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
+        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
+        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
     }
 }
"\\s+" (18 uses)
[GeneratedRegex("\\s+", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
-        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
+        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
+        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
     }
 }
"\\s\\s+" (17 uses)
[GeneratedRegex("\\s\\s+")]
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
-        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
+        /// <summary>Supports searching for characters in or not in "\t\n\v\f\r \u0085             \u2028\u2029   ".</summary>
+        internal static readonly SearchValues<char> s_whitespace = SearchValues.Create("\t\n\v\f\r \u0085             \u2028\u2029   ");
     }
 }
"(?<desc>h|ampm|am\\b|a\\.m\\.|a m\\b|a\\. m\ ..." (16 uses)
[GeneratedRegex("(?<desc>h|ampm|am\\b|a\\.m\\.|a m\\b|a\\. m\\.|a\\.m\\b|a\\. m\\b|pm\\b|p\\.m\\.|p m\\b|p\\. m\\.|p\\.m\\b|p\\. m\\b|p\\b\\b)", RegexOptions.Singleline)]
  ///                 ○ Match a sequence of expressions.<br/>
  ///                     ○ Match the string " m".<br/>
  ///                     ○ Match if at a word boundary.<br/>
-   ///                 ○ Match a sequence of expressions.<br/>
-   ///                     ○ Match if at a word boundary.<br/>
-   ///                     ○ Match if at a word boundary.<br/>
+   ///                 ○ Match if at a word boundary.<br/>
  /// </code>
  /// </remarks>
  [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "42.42.42.42")]
                                              return false; // The input didn't match.
                                          }
                                          
-                                           // Match if at a word boundary.
-                                           if (!Utilities.IsBoundary(inputSpan, pos + 1))
-                                           {
-                                               UncaptureUntil(0);
-                                               return false; // The input didn't match.
-                                           }
-                                           
                                          pos++;
                                          slice = inputSpan.Slice(pos);
                                      }

For more diff examples, see https://gist.github.com/MihuBot/2273def877179c54bafb17a692e2f31d

Total bytes of base: 54274946
Total bytes of diff: 54274415
Total bytes of delta: -531 (-0.00 % of base)
Total relative delta: -0.19
    diff is an improvement.
    relative diff is an improvement.

For a list of JIT diff improvements, see Improvements.md

Sample source code for further analysis
const string JsonPath = "RegexResults-1284.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/E2jppW7A");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions