Commit 5b39d8d3 authored by spupyrev's avatar spupyrev
Browse files

Revert "[CodeLayout] Faster basic block reordering, ext-tsp (#68275)"

This reverts commit 0a7bf3aa.
parent 0a7bf3aa
Loading
Loading
Loading
Loading
+100 −143
Original line number Diff line number Diff line
@@ -99,15 +99,22 @@ static cl::opt<unsigned> BackwardDistance(
    cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));

// The maximum size of a chain created by the algorithm. The size is bounded
// so that the algorithm can efficiently process extremely large instances.
// so that the algorithm can efficiently process extremely large instance.
static cl::opt<unsigned>
    MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512),
                 cl::desc("The maximum size of a chain to create"));
    MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
                 cl::desc("The maximum size of a chain to create."));

// The maximum ratio between densities of two chains for merging.
static cl::opt<double> MaxMergeDensityRatio(
    "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(128),
    cl::desc("The maximum ratio between densities of two chains for merging"));
// The maximum size of a chain for splitting. Larger values of the threshold
// may yield better quality at the cost of worsen run-time.
static cl::opt<unsigned> ChainSplitThreshold(
    "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
    cl::desc("The maximum size of a chain to apply splitting"));

// The option enables splitting (large) chains along in-coming and out-going
// jumps. This typically results in a better quality.
static cl::opt<bool> EnableChainSplitAlongJumps(
    "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true),
    cl::desc("The maximum size of a chain to apply splitting"));

// Algorithm-specific options for CDS.
static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden,
@@ -210,13 +217,11 @@ struct NodeT {
  NodeT &operator=(const NodeT &) = delete;
  NodeT &operator=(NodeT &&) = default;

  explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
      : Index(Index), Size(Size), ExecutionCount(Count) {}
  explicit NodeT(size_t Index, uint64_t Size, uint64_t EC)
      : Index(Index), Size(Size), ExecutionCount(EC) {}

  bool isEntry() const { return Index == 0; }

  // Check if Other is a successor of the node.
  bool isSuccessor(const NodeT *Other) const;
  // The total execution count of outgoing jumps.
  uint64_t outCount() const;

@@ -437,14 +442,6 @@ private:
  bool CacheValidBackward{false};
};

bool NodeT::isSuccessor(const NodeT *Other) const {
  for (JumpT *Jump : OutJumps) {
    if (Jump->Target == Other)
      return true;
  }
  return false;
}

uint64_t NodeT::outCount() const {
  uint64_t Count = 0;
  for (JumpT *Jump : OutJumps)
@@ -478,14 +475,14 @@ void ChainT::mergeEdges(ChainT *Other) {
  }
}

/// A wrapper around three concatenated vectors (chains) of objects; it is used
/// to avoid extra instantiation of the vectors.
template <typename ObjType> struct MergedVector {
  using ObjIter = typename std::vector<ObjType *>::const_iterator;
using NodeIter = std::vector<NodeT *>::const_iterator;

  MergedVector(ObjIter Begin1, ObjIter End1, ObjIter Begin2 = ObjIter(),
               ObjIter End2 = ObjIter(), ObjIter Begin3 = ObjIter(),
               ObjIter End3 = ObjIter())
/// A wrapper around three chains of nodes; it is used to avoid extra
/// instantiation of the vectors.
struct MergedChain {
  MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
              NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
              NodeIter End3 = NodeIter())
      : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
        End3(End3) {}

@@ -510,26 +507,13 @@ template <typename ObjType> struct MergedVector {

  const NodeT *getFirstNode() const { return *Begin1; }

  bool empty() const { return Begin1 == End1; }

  void append(ObjIter Begin, ObjIter End) {
    if (Begin2 == End2) {
      Begin2 = Begin;
      End2 = End;
      return;
    }
    assert(Begin3 == End3 && "cannot extend MergedVector");
    Begin3 = Begin;
    End3 = End;
  }

private:
  ObjIter Begin1;
  ObjIter End1;
  ObjIter Begin2;
  ObjIter End2;
  ObjIter Begin3;
  ObjIter End3;
  NodeIter Begin1;
  NodeIter End1;
  NodeIter Begin2;
  NodeIter End2;
  NodeIter Begin3;
  NodeIter End3;
};

/// Merge two chains of nodes respecting a given 'type' and 'offset'.
@@ -537,29 +521,29 @@ private:
/// If MergeType == 0, then the result is a concatenation of two chains.
/// Otherwise, the first chain is cut into two sub-chains at the offset,
/// and merged using all possible ways of concatenating three chains.
MergedVector<NodeT> mergeNodes(const std::vector<NodeT *> &X,
                               const std::vector<NodeT *> &Y,
                               size_t MergeOffset, MergeTypeT MergeType) {
MergedChain mergeNodes(const std::vector<NodeT *> &X,
                       const std::vector<NodeT *> &Y, size_t MergeOffset,
                       MergeTypeT MergeType) {
  // Split the first chain, X, into X1 and X2.
  MergedVector<NodeT>::ObjIter BeginX1 = X.begin();
  MergedVector<NodeT>::ObjIter EndX1 = X.begin() + MergeOffset;
  MergedVector<NodeT>::ObjIter BeginX2 = X.begin() + MergeOffset;
  MergedVector<NodeT>::ObjIter EndX2 = X.end();
  MergedVector<NodeT>::ObjIter BeginY = Y.begin();
  MergedVector<NodeT>::ObjIter EndY = Y.end();
  NodeIter BeginX1 = X.begin();
  NodeIter EndX1 = X.begin() + MergeOffset;
  NodeIter BeginX2 = X.begin() + MergeOffset;
  NodeIter EndX2 = X.end();
  NodeIter BeginY = Y.begin();
  NodeIter EndY = Y.end();

  // Construct a new chain from the three existing ones.
  switch (MergeType) {
  case MergeTypeT::X_Y:
    return MergedVector<NodeT>(BeginX1, EndX2, BeginY, EndY);
    return MergedChain(BeginX1, EndX2, BeginY, EndY);
  case MergeTypeT::Y_X:
    return MergedVector<NodeT>(BeginY, EndY, BeginX1, EndX2);
    return MergedChain(BeginY, EndY, BeginX1, EndX2);
  case MergeTypeT::X1_Y_X2:
    return MergedVector<NodeT>(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
    return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
  case MergeTypeT::Y_X2_X1:
    return MergedVector<NodeT>(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
    return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
  case MergeTypeT::X2_X1_Y:
    return MergedVector<NodeT>(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
    return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
  }
  llvm_unreachable("unexpected chain merge type");
}
@@ -634,10 +618,6 @@ private:
    AllChains.reserve(NumNodes);
    HotChains.reserve(NumNodes);
    for (NodeT &Node : AllNodes) {
      // Adjust execution counts.
      Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());
      Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());
      // Create a chain.
      AllChains.emplace_back(Node.Index, &Node);
      Node.CurChain = &AllChains.back();
      if (Node.ExecutionCount > 0)
@@ -650,13 +630,13 @@ private:
      for (JumpT *Jump : PredNode.OutJumps) {
        NodeT *SuccNode = Jump->Target;
        ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
        // This edge is already present in the graph.
        // this edge is already present in the graph.
        if (CurEdge != nullptr) {
          assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
          CurEdge->appendJump(Jump);
          continue;
        }
        // This is a new edge.
        // this is a new edge.
        AllEdges.emplace_back(Jump);
        PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
        SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
@@ -669,7 +649,7 @@ private:
  /// to B are from A. Such nodes should be adjacent in the optimal ordering;
  /// the method finds and merges such pairs of nodes.
  void mergeForcedPairs() {
    // Find forced pairs of blocks.
    // Find fallthroughs based on edge weights.
    for (NodeT &Node : AllNodes) {
      if (SuccNodes[Node.Index].size() == 1 &&
          PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
@@ -719,44 +699,28 @@ private:
    /// Deterministically compare pairs of chains.
    auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
                                const ChainT *A2, const ChainT *B2) {
      return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
      if (A1 != A2)
        return A1->Id < A2->Id;
      return B1->Id < B2->Id;
    };

    double PrevScore = 1e9;
    while (HotChains.size() > 1) {
      ChainT *BestChainPred = nullptr;
      ChainT *BestChainSucc = nullptr;
      MergeGainT BestGain;
      // Iterate over all pairs of chains.
      for (ChainT *ChainPred : HotChains) {
        // Since the score of merging doesn't increase, we can stop early when
        // the newly found merge is as good as the previous one.
        if (BestGain.score() == PrevScore)
          break;
        // Get candidates for merging with the current chain.
        for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {
          // Ignore loop edges.
          if (Edge->isSelfEdge())
          if (ChainPred == ChainSucc)
            continue;

          // Stop early if the combined chain violates the maximum allowed size.
          if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
            continue;
          // Don't merge the chains if they have vastly different densities.
          // We stop early if the ratio between the densities exceeds
          // MaxMergeDensityRatio. Smaller values of the option result in
          // fewer merges (hence, more chains), which in turn typically yields
          // smaller size of the hot code section.
          double minDensity =
              std::min(ChainPred->density(), ChainSucc->density());
          double maxDensity =
              std::max(ChainPred->density(), ChainSucc->density());
          assert(minDensity > 0.0 && maxDensity > 0.0 &&
                 "incorrectly computed chain densities");
          const double Ratio = maxDensity / minDensity;
          if (Ratio > MaxMergeDensityRatio)
            continue;

          // Compute the gain of merging the two chains
          // Compute the gain of merging the two chains.
          MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
          if (CurGain.score() <= EPS)
            continue;
@@ -768,9 +732,6 @@ private:
            BestGain = CurGain;
            BestChainPred = ChainPred;
            BestChainSucc = ChainSucc;
            // Stop early when the merge is as good as the previous one.
            if (BestGain.score() == PrevScore)
              break;
          }
        }
      }
@@ -780,7 +741,6 @@ private:
        break;

      // Merge the best pair of chains.
      PrevScore = BestGain.score();
      mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
                  BestGain.mergeType());
    }
@@ -809,25 +769,24 @@ private:
  }

  /// Compute the Ext-TSP score for a given node order and a list of jumps.
  double extTSPScore(const MergedVector<NodeT> &Nodes,
                     const MergedVector<JumpT> &Jumps) const {
    if (Jumps.empty() || Nodes.empty())
  double extTSPScore(const MergedChain &MergedBlocks,
                     const std::vector<JumpT *> &Jumps) const {
    if (Jumps.empty())
      return 0.0;

    uint64_t CurAddr = 0;
    Nodes.forEach([&](const NodeT *Node) {
    MergedBlocks.forEach([&](const NodeT *Node) {
      Node->EstimatedAddr = CurAddr;
      CurAddr += Node->Size;
    });

    double Score = 0;
    Jumps.forEach([&](const JumpT *Jump) {
    for (JumpT *Jump : Jumps) {
      const NodeT *SrcBlock = Jump->Source;
      const NodeT *DstBlock = Jump->Target;
      Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
                             DstBlock->EstimatedAddr, Jump->ExecutionCount,
                             Jump->IsConditional);
    });
    }
    return Score;
  }

@@ -839,15 +798,17 @@ private:
  /// element being the corresponding merging type.
  MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
                              ChainEdge *Edge) const {
    if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
    if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
      return Edge->getCachedMergeGain(ChainPred, ChainSucc);
    }

    // Precompute jumps between ChainPred and ChainSucc.
    MergedVector<JumpT> Jumps(Edge->jumps().begin(), Edge->jumps().end());
    assert(!Jumps.empty() && "trying to merge chains w/o jumps");
    auto Jumps = Edge->jumps();
    ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
    if (EdgePP != nullptr)
      Jumps.append(EdgePP->jumps().begin(), EdgePP->jumps().end());
    if (EdgePP != nullptr) {
      Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
    }
    assert(!Jumps.empty() && "trying to merge chains w/o jumps");

    // This object holds the best chosen gain of merging two chains.
    MergeGainT Gain = MergeGainT();
@@ -875,6 +836,7 @@ private:
    Gain.updateIfLessThan(
        computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));

    if (EnableChainSplitAlongJumps) {
      // Attach (a part of) ChainPred before the first node of ChainSucc.
      for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
        const NodeT *SrcBlock = Jump->Source;
@@ -892,20 +854,18 @@ private:
        size_t Offset = DstBlock->CurIndex;
        tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
      }
    }

    // Try to break ChainPred in various ways and concatenate with ChainSucc.
    // In practice, applying X2_Y_X1 merging is almost never provides benefits;
    // thus, we exclude it from consideration to reduce the search space.
    if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
      for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
      // Do not split the chain along a jump.
      const NodeT *BB = ChainPred->Nodes[Offset - 1];
      const NodeT *BB2 = ChainPred->Nodes[Offset];
      if (BB->isSuccessor(BB2))
        continue;

        // Try to split the chain in different ways. In practice, applying
        // X2_Y_X1 merging is almost never provides benefits; thus, we exclude
        // it from consideration to reduce the search space.
        tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
                                 MergeTypeT::X2_X1_Y});
      }
    }
    Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
    return Gain;
  }
@@ -915,20 +875,19 @@ private:
  ///
  /// The two chains are not modified in the method.
  MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
                              const MergedVector<JumpT> &Jumps,
                              const std::vector<JumpT *> &Jumps,
                              size_t MergeOffset, MergeTypeT MergeType) const {
    MergedVector<NodeT> MergedNodes =
    auto MergedBlocks =
        mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);

    // Do not allow a merge that does not preserve the original entry point.
    if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
        !MergedNodes.getFirstNode()->isEntry())
        !MergedBlocks.getFirstNode()->isEntry())
      return MergeGainT();

    // The gain for the new chain.
    double NewScore = extTSPScore(MergedNodes, Jumps);
    double CurScore = ChainPred->Score;
    return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
    auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score;
    return MergeGainT(NewGainScore, MergeOffset, MergeType);
  }

  /// Merge chain From into chain Into, update the list of active chains,
@@ -938,7 +897,7 @@ private:
    assert(Into != From && "a chain cannot be merged with itself");

    // Merge the nodes.
    MergedVector<NodeT> MergedNodes =
    MergedChain MergedNodes =
        mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
    Into->merge(From, MergedNodes.getNodes());

@@ -949,10 +908,8 @@ private:
    // Update cached ext-tsp score for the new chain.
    ChainEdge *SelfEdge = Into->getEdge(Into);
    if (SelfEdge != nullptr) {
      MergedNodes = MergedVector<NodeT>(Into->Nodes.begin(), Into->Nodes.end());
      MergedVector<JumpT> MergedJumps(SelfEdge->jumps().begin(),
                                      SelfEdge->jumps().end());
      Into->Score = extTSPScore(MergedNodes, MergedJumps);
      MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end());
      Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps());
    }

    // Remove the chain from the list of active chains.
@@ -1298,7 +1255,7 @@ private:
  }

  /// Compute the change of the distance locality after merging the chains.
  double distBasedLocalityGain(const MergedVector<NodeT> &MergedBlocks,
  double distBasedLocalityGain(const MergedChain &MergedBlocks,
                               const std::vector<JumpT *> &Jumps) const {
    if (Jumps.empty())
      return 0.0;
@@ -1326,7 +1283,7 @@ private:
    assert(Into != From && "a chain cannot be merged with itself");

    // Merge the nodes.
    MergedVector<NodeT> MergedNodes =
    MergedChain MergedNodes =
        mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
    Into->merge(From, MergedNodes.getNodes());