Unverified Commit 0a0e06f2 authored by Nikita Popov's avatar Nikita Popov Committed by GitHub
Browse files

[TableGen] Fix prefix detection with anchor (NFC) (#71379)

instregex uses an optimization, where the constant prefix of the regex
is extracted to perform a binary search first. However, this
optimization currently mainly fails to apply, because most instregex
uses have an explicit ^ anchor, which gets counted as a meta char and
disables the optimization.

Make sure the anchor is skipped when determining the prefix. Also fix an
implementation bug this exposes, where the pick a too long prefix if the
first meta character is a quantifier.

This cuts the time needed to generate files like X86GenInstrInfo.inc by
half.
parent d64d5ea1
Loading
Loading
Loading
Loading
+19 −1
Original line number Diff line number Diff line
@@ -91,10 +91,25 @@ struct InstRegexOp : public SetTheory::Operator {
        PrintFatalError(Loc, "instregex requires pattern string: " +
                                 Expr->getAsString());
      StringRef Original = SI->getValue();
      // Drop an explicit ^ anchor to not interfere with prefix search.
      bool HadAnchor = Original.consume_front("^");

      // Extract a prefix that we can binary search on.
      static const char RegexMetachars[] = "()^$|*+?.[]\\{}";
      auto FirstMeta = Original.find_first_of(RegexMetachars);
      if (FirstMeta != StringRef::npos && FirstMeta > 0) {
        // If we have a regex like ABC* we can only use AB as the prefix, as
        // the * acts on C.
        switch (Original[FirstMeta]) {
        case '+':
        case '*':
        case '?':
          --FirstMeta;
          break;
        default:
          break;
        }
      }

      // Look for top-level | or ?. We cannot optimize them to binary search.
      if (removeParens(Original).find_first_of("|?") != std::string::npos)
@@ -106,7 +121,10 @@ struct InstRegexOp : public SetTheory::Operator {
      if (!PatStr.empty()) {
        // For the rest use a python-style prefix match.
        std::string pat = std::string(PatStr);
        if (pat[0] != '^') {
        // Add ^ anchor. If we had one originally, don't need the group.
        if (HadAnchor) {
          pat.insert(0, "^");
        } else {
          pat.insert(0, "^(");
          pat.insert(pat.end(), ')');
        }