Skip to content

[RegexDiff X64] [MihaZupan] Fix compiled/source-generated lazy loop stack un ... #2008

@MihuBot

Description

@MihuBot

Job completed in 14 minutes 31 seconds (remote runner delay: 1 minute 16 seconds).
dotnet/runtime#129628
Using arguments: regexdiff
Main commit: dotnet/runtime@af74d68
PR commit: MihaZupan/runtime@0cac250

123 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"{(?<env>env:)??\\w+(\\s+(\\?\\?)??\\s+\\w+)??}" (2282 uses)
[GeneratedRegex("{(?<env>env:)??\\w+(\\s+(\\?\\?)??\\s+\\w+)??}")]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      UncaptureUntil(0);
      return false; // The input didn't match.
  }
"\\b(?<timeOfDay>((((dans\\s+(l[ea])?\\s+)?(( ..." (194 uses)
[GeneratedRegex("\\b(?<timeOfDay>((((dans\\s+(l[ea])?\\s+)?((?<early>d[eé]but(\\s+|-)|t[oô]t(\\s+|-)(l[ea]\\s*)?)|(?<late>fin\\s*|fin de(\\s+(la)?)|tard\\s*))?(matin[ée]e|matin|((d|l)?'?)apr[eè]s[-|\\s*]midi|nuit|soir[eé]e|soir)))|(((\\s+(l[ea])?\\s+)?)(jour|journ[eé]e)))s?)\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      goto CaptureBacktrack12;
  }
  goto LazyLoopBody;
"^ *\\[(?<selecttype>(AZURE.SELECTOR|AZURE.SE ..." (188 uses)
[GeneratedRegex("^ *\\[(?<selecttype>(AZURE.SELECTOR|AZURE.SELECTOR-LIST))( *\\((?<selectorconditions>.*?)\\))?\\] *(?:\\n|$)", RegexOptions.IgnoreCase)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      UncaptureUntil(0);
      return false; // The input didn't match.
  }
"(?<order>ce|cette|au\\s+cours+(du|de))\\b" (183 uses)
[GeneratedRegex("(?<order>ce|cette|au\\s+cours+(du|de))\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      goto AlternationBranch;
  }
  goto LazyLoopBody;
"^\\s*\\b((in|nel(la)?|al(la)?|di)\\s+)?(matt ..." (122 uses)
[GeneratedRegex("^\\s*\\b((in|nel(la)?|al(la)?|di)\\s+)?(mattin[oa]|mattinata|pomeriggio|sera|serata|notte|nottata)\\b", RegexOptions.ExplicitCapture | RegexOptions.Singleline)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos--;
+       stackpos -= lazyloop_iteration;
      goto AlternationBranch3;
  }
  goto LazyLoopBody;
"(ce|cette)\\b" (114 uses)
[GeneratedRegex("(ce|cette)\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      UncaptureUntil(0);
      return false; // The input didn't match.
  }
"\\b(?<timeOfDay>(((((il|la|in|a(l(la)?)?|nel ..." (114 uses)
[GeneratedRegex("\\b(?<timeOfDay>(((((il|la|in|a(l(la)?)?|nel(la)?|di)\\s+)?(((mattin[ao]|mattinata|pomeriggio|sera|serata|notte|nottata)(?<early>\\s+presto)|(?<early>prim[ao]\\s+)(mattin[ao]|mattinata|pomeriggio|sera|serata|notte|nottata))|((mattin[ao]|mattinata|pomeriggio|sera|serata|notte|nottata)(?<late>\\s+(tardi|inoltrat[oa]|fonda))|(?<late>tard[oa]\\s+)?(mattin[ao]|mattinata|pomeriggio|sera|serata|notte|nottata)))))|(in\\s+giornata|durante\\s+il\\s+giorno|(durante\\s+le|nelle)\\s+ore\\s+d[i']\\s*ufficio)))\\b", RegexOptions.ExplicitCapture | RegexOptions.Singleline)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      goto AlternationBranch15;
  }
  goto LazyLoopBody;
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration1 != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration1 * 2;
      goto AlternationBranch24;
  }
  goto LazyLoopBody1;
"(?<=\\b)((\\d*(1er|2e|2eme|3e|3eme|4e|4eme|5 ..." (106 uses)
[GeneratedRegex("(?<=\\b)((\\d*(1er|2e|2eme|3e|3eme|4e|4eme|5e|5eme|6e|6eme|7e|7eme|8e|8eme|9e|9eme|0e|0eme))|(11e|11eme|12e|12eme))(?=\\b)", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration * 2;
          goto AlternationBranch2;
      }
      goto LazyLoopBody;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration1 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration1 * 2;
          goto AlternationBranch3;
      }
      goto LazyLoopBody1;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration2 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration2 * 2;
          goto AlternationBranch4;
      }
      goto LazyLoopBody2;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration3 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration3 * 2;
          goto AlternationBranch5;
      }
      goto LazyLoopBody3;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration4 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration4 * 2;
          goto AlternationBranch6;
      }
      goto LazyLoopBody4;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration5 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration5 * 2;
          goto AlternationBranch7;
      }
      goto LazyLoopBody5;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration6 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration6 * 2;
          goto AlternationBranch8;
      }
      goto LazyLoopBody6;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration7 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration7 * 2;
          goto AlternationBranch9;
      }
      goto LazyLoopBody7;
      // don't continue lazily iterating. Instead, backtrack.
      if (lazyloop_iteration8 != 0)
      {
-           stackpos -= 2;
+           stackpos -= lazyloop_iteration8 * 2;
          goto CharLoopBacktrack;
      }
      goto LazyLoopBody8;
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration9 != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration9 * 2;
      goto AlternationBranch10;
  }
  goto LazyLoopBody9;
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration10 != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration10 * 2;
      UncaptureUntil(0);
      return false; // The input didn't match.
  }
"\\b(?<hournum>zero|un|deux|trois|quatre|cinq ..." (98 uses)
[GeneratedRegex("\\b(?<hournum>zero|un|deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze|treize|quatorze|quinze|dix-six|dix-sept|dix-huit|dix-neuf|vingt|vingt-et-un|vingt-deux|vingt-trois)\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      UncaptureUntil(0);
      return false; // The input didn't match.
  }
"\\b(verdade|verdadeir[oa]|sim|isso|claro|ok) ..." (93 uses)
[GeneratedRegex("\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)", RegexOptions.Singleline)]
  // don't continue lazily iterating. Instead, backtrack.
  if (lazyloop_iteration != 0)
  {
-       stackpos -= 2;
+       stackpos -= lazyloop_iteration * 2;
      goto AlternationBranch1;
  }
  goto LazyLoopBody;

For more diff examples, see https://gist.github.com/MihuBot/f789d9b6b4670eb3cd9c04cc1d48c460

Sample source code for further analysis
const string JsonPath = "RegexResults-2008.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/FQ-H-llA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions