Loading llvm/lib/Transforms/Utils/CodeLayout.cpp +100 −143 Original line number Diff line number Diff line Loading @@ -99,15 +99,22 @@ static cl::opt<unsigned> BackwardDistance( cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); // The maximum size of a chain created by the algorithm. The size is bounded // so that the algorithm can efficiently process extremely large instances. // so that the algorithm can efficiently process extremely large instance. static cl::opt<unsigned> MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512), cl::desc("The maximum size of a chain to create")); MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), cl::desc("The maximum size of a chain to create.")); // The maximum ratio between densities of two chains for merging. static cl::opt<double> MaxMergeDensityRatio( "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(128), cl::desc("The maximum ratio between densities of two chains for merging")); // The maximum size of a chain for splitting. Larger values of the threshold // may yield better quality at the cost of worsen run-time. static cl::opt<unsigned> ChainSplitThreshold( "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), cl::desc("The maximum size of a chain to apply splitting")); // The option enables splitting (large) chains along in-coming and out-going // jumps. This typically results in a better quality. static cl::opt<bool> EnableChainSplitAlongJumps( "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true), cl::desc("The maximum size of a chain to apply splitting")); // Algorithm-specific options for CDS. static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden, Loading Loading @@ -210,13 +217,11 @@ struct NodeT { NodeT &operator=(const NodeT &) = delete; NodeT &operator=(NodeT &&) = default; explicit NodeT(size_t Index, uint64_t Size, uint64_t Count) : Index(Index), Size(Size), ExecutionCount(Count) {} explicit NodeT(size_t Index, uint64_t Size, uint64_t EC) : Index(Index), Size(Size), ExecutionCount(EC) {} bool isEntry() const { return Index == 0; } // Check if Other is a successor of the node. bool isSuccessor(const NodeT *Other) const; // The total execution count of outgoing jumps. uint64_t outCount() const; Loading Loading @@ -437,14 +442,6 @@ private: bool CacheValidBackward{false}; }; bool NodeT::isSuccessor(const NodeT *Other) const { for (JumpT *Jump : OutJumps) { if (Jump->Target == Other) return true; } return false; } uint64_t NodeT::outCount() const { uint64_t Count = 0; for (JumpT *Jump : OutJumps) Loading Loading @@ -478,14 +475,14 @@ void ChainT::mergeEdges(ChainT *Other) { } } /// A wrapper around three concatenated vectors (chains) of objects; it is used /// to avoid extra instantiation of the vectors. template <typename ObjType> struct MergedVector { using ObjIter = typename std::vector<ObjType *>::const_iterator; using NodeIter = std::vector<NodeT *>::const_iterator; MergedVector(ObjIter Begin1, ObjIter End1, ObjIter Begin2 = ObjIter(), ObjIter End2 = ObjIter(), ObjIter Begin3 = ObjIter(), ObjIter End3 = ObjIter()) /// A wrapper around three chains of nodes; it is used to avoid extra /// instantiation of the vectors. struct MergedChain { MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(), NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(), NodeIter End3 = NodeIter()) : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3), End3(End3) {} Loading @@ -510,26 +507,13 @@ template <typename ObjType> struct MergedVector { const NodeT *getFirstNode() const { return *Begin1; } bool empty() const { return Begin1 == End1; } void append(ObjIter Begin, ObjIter End) { if (Begin2 == End2) { Begin2 = Begin; End2 = End; return; } assert(Begin3 == End3 && "cannot extend MergedVector"); Begin3 = Begin; End3 = End; } private: ObjIter Begin1; ObjIter End1; ObjIter Begin2; ObjIter End2; ObjIter Begin3; ObjIter End3; NodeIter Begin1; NodeIter End1; NodeIter Begin2; NodeIter End2; NodeIter Begin3; NodeIter End3; }; /// Merge two chains of nodes respecting a given 'type' and 'offset'. Loading @@ -537,29 +521,29 @@ private: /// If MergeType == 0, then the result is a concatenation of two chains. /// Otherwise, the first chain is cut into two sub-chains at the offset, /// and merged using all possible ways of concatenating three chains. MergedVector<NodeT> mergeNodes(const std::vector<NodeT *> &X, const std::vector<NodeT *> &Y, size_t MergeOffset, MergeTypeT MergeType) { MergedChain mergeNodes(const std::vector<NodeT *> &X, const std::vector<NodeT *> &Y, size_t MergeOffset, MergeTypeT MergeType) { // Split the first chain, X, into X1 and X2. MergedVector<NodeT>::ObjIter BeginX1 = X.begin(); MergedVector<NodeT>::ObjIter EndX1 = X.begin() + MergeOffset; MergedVector<NodeT>::ObjIter BeginX2 = X.begin() + MergeOffset; MergedVector<NodeT>::ObjIter EndX2 = X.end(); MergedVector<NodeT>::ObjIter BeginY = Y.begin(); MergedVector<NodeT>::ObjIter EndY = Y.end(); NodeIter BeginX1 = X.begin(); NodeIter EndX1 = X.begin() + MergeOffset; NodeIter BeginX2 = X.begin() + MergeOffset; NodeIter EndX2 = X.end(); NodeIter BeginY = Y.begin(); NodeIter EndY = Y.end(); // Construct a new chain from the three existing ones. switch (MergeType) { case MergeTypeT::X_Y: return MergedVector<NodeT>(BeginX1, EndX2, BeginY, EndY); return MergedChain(BeginX1, EndX2, BeginY, EndY); case MergeTypeT::Y_X: return MergedVector<NodeT>(BeginY, EndY, BeginX1, EndX2); return MergedChain(BeginY, EndY, BeginX1, EndX2); case MergeTypeT::X1_Y_X2: return MergedVector<NodeT>(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); case MergeTypeT::Y_X2_X1: return MergedVector<NodeT>(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); case MergeTypeT::X2_X1_Y: return MergedVector<NodeT>(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); } llvm_unreachable("unexpected chain merge type"); } Loading Loading @@ -634,10 +618,6 @@ private: AllChains.reserve(NumNodes); HotChains.reserve(NumNodes); for (NodeT &Node : AllNodes) { // Adjust execution counts. Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount()); Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount()); // Create a chain. AllChains.emplace_back(Node.Index, &Node); Node.CurChain = &AllChains.back(); if (Node.ExecutionCount > 0) Loading @@ -650,13 +630,13 @@ private: for (JumpT *Jump : PredNode.OutJumps) { NodeT *SuccNode = Jump->Target; ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain); // This edge is already present in the graph. // this edge is already present in the graph. if (CurEdge != nullptr) { assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr); CurEdge->appendJump(Jump); continue; } // This is a new edge. // this is a new edge. AllEdges.emplace_back(Jump); PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back()); SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back()); Loading @@ -669,7 +649,7 @@ private: /// to B are from A. Such nodes should be adjacent in the optimal ordering; /// the method finds and merges such pairs of nodes. void mergeForcedPairs() { // Find forced pairs of blocks. // Find fallthroughs based on edge weights. for (NodeT &Node : AllNodes) { if (SuccNodes[Node.Index].size() == 1 && PredNodes[SuccNodes[Node.Index][0]].size() == 1 && Loading Loading @@ -719,44 +699,28 @@ private: /// Deterministically compare pairs of chains. auto compareChainPairs = [](const ChainT *A1, const ChainT *B1, const ChainT *A2, const ChainT *B2) { return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id); if (A1 != A2) return A1->Id < A2->Id; return B1->Id < B2->Id; }; double PrevScore = 1e9; while (HotChains.size() > 1) { ChainT *BestChainPred = nullptr; ChainT *BestChainSucc = nullptr; MergeGainT BestGain; // Iterate over all pairs of chains. for (ChainT *ChainPred : HotChains) { // Since the score of merging doesn't increase, we can stop early when // the newly found merge is as good as the previous one. if (BestGain.score() == PrevScore) break; // Get candidates for merging with the current chain. for (const auto &[ChainSucc, Edge] : ChainPred->Edges) { // Ignore loop edges. if (Edge->isSelfEdge()) if (ChainPred == ChainSucc) continue; // Stop early if the combined chain violates the maximum allowed size. if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize) continue; // Don't merge the chains if they have vastly different densities. // We stop early if the ratio between the densities exceeds // MaxMergeDensityRatio. Smaller values of the option result in // fewer merges (hence, more chains), which in turn typically yields // smaller size of the hot code section. double minDensity = std::min(ChainPred->density(), ChainSucc->density()); double maxDensity = std::max(ChainPred->density(), ChainSucc->density()); assert(minDensity > 0.0 && maxDensity > 0.0 && "incorrectly computed chain densities"); const double Ratio = maxDensity / minDensity; if (Ratio > MaxMergeDensityRatio) continue; // Compute the gain of merging the two chains // Compute the gain of merging the two chains. MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge); if (CurGain.score() <= EPS) continue; Loading @@ -768,9 +732,6 @@ private: BestGain = CurGain; BestChainPred = ChainPred; BestChainSucc = ChainSucc; // Stop early when the merge is as good as the previous one. if (BestGain.score() == PrevScore) break; } } } Loading @@ -780,7 +741,6 @@ private: break; // Merge the best pair of chains. PrevScore = BestGain.score(); mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(), BestGain.mergeType()); } Loading Loading @@ -809,25 +769,24 @@ private: } /// Compute the Ext-TSP score for a given node order and a list of jumps. double extTSPScore(const MergedVector<NodeT> &Nodes, const MergedVector<JumpT> &Jumps) const { if (Jumps.empty() || Nodes.empty()) double extTSPScore(const MergedChain &MergedBlocks, const std::vector<JumpT *> &Jumps) const { if (Jumps.empty()) return 0.0; uint64_t CurAddr = 0; Nodes.forEach([&](const NodeT *Node) { MergedBlocks.forEach([&](const NodeT *Node) { Node->EstimatedAddr = CurAddr; CurAddr += Node->Size; }); double Score = 0; Jumps.forEach([&](const JumpT *Jump) { for (JumpT *Jump : Jumps) { const NodeT *SrcBlock = Jump->Source; const NodeT *DstBlock = Jump->Target; Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size, DstBlock->EstimatedAddr, Jump->ExecutionCount, Jump->IsConditional); }); } return Score; } Loading @@ -839,15 +798,17 @@ private: /// element being the corresponding merging type. MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc, ChainEdge *Edge) const { if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) { return Edge->getCachedMergeGain(ChainPred, ChainSucc); } // Precompute jumps between ChainPred and ChainSucc. MergedVector<JumpT> Jumps(Edge->jumps().begin(), Edge->jumps().end()); assert(!Jumps.empty() && "trying to merge chains w/o jumps"); auto Jumps = Edge->jumps(); ChainEdge *EdgePP = ChainPred->getEdge(ChainPred); if (EdgePP != nullptr) Jumps.append(EdgePP->jumps().begin(), EdgePP->jumps().end()); if (EdgePP != nullptr) { Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end()); } assert(!Jumps.empty() && "trying to merge chains w/o jumps"); // This object holds the best chosen gain of merging two chains. MergeGainT Gain = MergeGainT(); Loading Loading @@ -875,6 +836,7 @@ private: Gain.updateIfLessThan( computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y)); if (EnableChainSplitAlongJumps) { // Attach (a part of) ChainPred before the first node of ChainSucc. for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) { const NodeT *SrcBlock = Jump->Source; Loading @@ -892,20 +854,18 @@ private: size_t Offset = DstBlock->CurIndex; tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1}); } } // Try to break ChainPred in various ways and concatenate with ChainSucc. // In practice, applying X2_Y_X1 merging is almost never provides benefits; // thus, we exclude it from consideration to reduce the search space. if (ChainPred->Nodes.size() <= ChainSplitThreshold) { for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) { // Do not split the chain along a jump. const NodeT *BB = ChainPred->Nodes[Offset - 1]; const NodeT *BB2 = ChainPred->Nodes[Offset]; if (BB->isSuccessor(BB2)) continue; // Try to split the chain in different ways. In practice, applying // X2_Y_X1 merging is almost never provides benefits; thus, we exclude // it from consideration to reduce the search space. tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1, MergeTypeT::X2_X1_Y}); } } Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain); return Gain; } Loading @@ -915,20 +875,19 @@ private: /// /// The two chains are not modified in the method. MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc, const MergedVector<JumpT> &Jumps, const std::vector<JumpT *> &Jumps, size_t MergeOffset, MergeTypeT MergeType) const { MergedVector<NodeT> MergedNodes = auto MergedBlocks = mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType); // Do not allow a merge that does not preserve the original entry point. if ((ChainPred->isEntry() || ChainSucc->isEntry()) && !MergedNodes.getFirstNode()->isEntry()) !MergedBlocks.getFirstNode()->isEntry()) return MergeGainT(); // The gain for the new chain. double NewScore = extTSPScore(MergedNodes, Jumps); double CurScore = ChainPred->Score; return MergeGainT(NewScore - CurScore, MergeOffset, MergeType); auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score; return MergeGainT(NewGainScore, MergeOffset, MergeType); } /// Merge chain From into chain Into, update the list of active chains, Loading @@ -938,7 +897,7 @@ private: assert(Into != From && "a chain cannot be merged with itself"); // Merge the nodes. MergedVector<NodeT> MergedNodes = MergedChain MergedNodes = mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); Into->merge(From, MergedNodes.getNodes()); Loading @@ -949,10 +908,8 @@ private: // Update cached ext-tsp score for the new chain. ChainEdge *SelfEdge = Into->getEdge(Into); if (SelfEdge != nullptr) { MergedNodes = MergedVector<NodeT>(Into->Nodes.begin(), Into->Nodes.end()); MergedVector<JumpT> MergedJumps(SelfEdge->jumps().begin(), SelfEdge->jumps().end()); Into->Score = extTSPScore(MergedNodes, MergedJumps); MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end()); Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps()); } // Remove the chain from the list of active chains. Loading Loading @@ -1298,7 +1255,7 @@ private: } /// Compute the change of the distance locality after merging the chains. double distBasedLocalityGain(const MergedVector<NodeT> &MergedBlocks, double distBasedLocalityGain(const MergedChain &MergedBlocks, const std::vector<JumpT *> &Jumps) const { if (Jumps.empty()) return 0.0; Loading Loading @@ -1326,7 +1283,7 @@ private: assert(Into != From && "a chain cannot be merged with itself"); // Merge the nodes. MergedVector<NodeT> MergedNodes = MergedChain MergedNodes = mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); Into->merge(From, MergedNodes.getNodes()); Loading Loading
llvm/lib/Transforms/Utils/CodeLayout.cpp +100 −143 Original line number Diff line number Diff line Loading @@ -99,15 +99,22 @@ static cl::opt<unsigned> BackwardDistance( cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); // The maximum size of a chain created by the algorithm. The size is bounded // so that the algorithm can efficiently process extremely large instances. // so that the algorithm can efficiently process extremely large instance. static cl::opt<unsigned> MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512), cl::desc("The maximum size of a chain to create")); MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), cl::desc("The maximum size of a chain to create.")); // The maximum ratio between densities of two chains for merging. static cl::opt<double> MaxMergeDensityRatio( "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(128), cl::desc("The maximum ratio between densities of two chains for merging")); // The maximum size of a chain for splitting. Larger values of the threshold // may yield better quality at the cost of worsen run-time. static cl::opt<unsigned> ChainSplitThreshold( "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), cl::desc("The maximum size of a chain to apply splitting")); // The option enables splitting (large) chains along in-coming and out-going // jumps. This typically results in a better quality. static cl::opt<bool> EnableChainSplitAlongJumps( "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true), cl::desc("The maximum size of a chain to apply splitting")); // Algorithm-specific options for CDS. static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden, Loading Loading @@ -210,13 +217,11 @@ struct NodeT { NodeT &operator=(const NodeT &) = delete; NodeT &operator=(NodeT &&) = default; explicit NodeT(size_t Index, uint64_t Size, uint64_t Count) : Index(Index), Size(Size), ExecutionCount(Count) {} explicit NodeT(size_t Index, uint64_t Size, uint64_t EC) : Index(Index), Size(Size), ExecutionCount(EC) {} bool isEntry() const { return Index == 0; } // Check if Other is a successor of the node. bool isSuccessor(const NodeT *Other) const; // The total execution count of outgoing jumps. uint64_t outCount() const; Loading Loading @@ -437,14 +442,6 @@ private: bool CacheValidBackward{false}; }; bool NodeT::isSuccessor(const NodeT *Other) const { for (JumpT *Jump : OutJumps) { if (Jump->Target == Other) return true; } return false; } uint64_t NodeT::outCount() const { uint64_t Count = 0; for (JumpT *Jump : OutJumps) Loading Loading @@ -478,14 +475,14 @@ void ChainT::mergeEdges(ChainT *Other) { } } /// A wrapper around three concatenated vectors (chains) of objects; it is used /// to avoid extra instantiation of the vectors. template <typename ObjType> struct MergedVector { using ObjIter = typename std::vector<ObjType *>::const_iterator; using NodeIter = std::vector<NodeT *>::const_iterator; MergedVector(ObjIter Begin1, ObjIter End1, ObjIter Begin2 = ObjIter(), ObjIter End2 = ObjIter(), ObjIter Begin3 = ObjIter(), ObjIter End3 = ObjIter()) /// A wrapper around three chains of nodes; it is used to avoid extra /// instantiation of the vectors. struct MergedChain { MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(), NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(), NodeIter End3 = NodeIter()) : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3), End3(End3) {} Loading @@ -510,26 +507,13 @@ template <typename ObjType> struct MergedVector { const NodeT *getFirstNode() const { return *Begin1; } bool empty() const { return Begin1 == End1; } void append(ObjIter Begin, ObjIter End) { if (Begin2 == End2) { Begin2 = Begin; End2 = End; return; } assert(Begin3 == End3 && "cannot extend MergedVector"); Begin3 = Begin; End3 = End; } private: ObjIter Begin1; ObjIter End1; ObjIter Begin2; ObjIter End2; ObjIter Begin3; ObjIter End3; NodeIter Begin1; NodeIter End1; NodeIter Begin2; NodeIter End2; NodeIter Begin3; NodeIter End3; }; /// Merge two chains of nodes respecting a given 'type' and 'offset'. Loading @@ -537,29 +521,29 @@ private: /// If MergeType == 0, then the result is a concatenation of two chains. /// Otherwise, the first chain is cut into two sub-chains at the offset, /// and merged using all possible ways of concatenating three chains. MergedVector<NodeT> mergeNodes(const std::vector<NodeT *> &X, const std::vector<NodeT *> &Y, size_t MergeOffset, MergeTypeT MergeType) { MergedChain mergeNodes(const std::vector<NodeT *> &X, const std::vector<NodeT *> &Y, size_t MergeOffset, MergeTypeT MergeType) { // Split the first chain, X, into X1 and X2. MergedVector<NodeT>::ObjIter BeginX1 = X.begin(); MergedVector<NodeT>::ObjIter EndX1 = X.begin() + MergeOffset; MergedVector<NodeT>::ObjIter BeginX2 = X.begin() + MergeOffset; MergedVector<NodeT>::ObjIter EndX2 = X.end(); MergedVector<NodeT>::ObjIter BeginY = Y.begin(); MergedVector<NodeT>::ObjIter EndY = Y.end(); NodeIter BeginX1 = X.begin(); NodeIter EndX1 = X.begin() + MergeOffset; NodeIter BeginX2 = X.begin() + MergeOffset; NodeIter EndX2 = X.end(); NodeIter BeginY = Y.begin(); NodeIter EndY = Y.end(); // Construct a new chain from the three existing ones. switch (MergeType) { case MergeTypeT::X_Y: return MergedVector<NodeT>(BeginX1, EndX2, BeginY, EndY); return MergedChain(BeginX1, EndX2, BeginY, EndY); case MergeTypeT::Y_X: return MergedVector<NodeT>(BeginY, EndY, BeginX1, EndX2); return MergedChain(BeginY, EndY, BeginX1, EndX2); case MergeTypeT::X1_Y_X2: return MergedVector<NodeT>(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2); case MergeTypeT::Y_X2_X1: return MergedVector<NodeT>(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1); case MergeTypeT::X2_X1_Y: return MergedVector<NodeT>(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY); } llvm_unreachable("unexpected chain merge type"); } Loading Loading @@ -634,10 +618,6 @@ private: AllChains.reserve(NumNodes); HotChains.reserve(NumNodes); for (NodeT &Node : AllNodes) { // Adjust execution counts. Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount()); Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount()); // Create a chain. AllChains.emplace_back(Node.Index, &Node); Node.CurChain = &AllChains.back(); if (Node.ExecutionCount > 0) Loading @@ -650,13 +630,13 @@ private: for (JumpT *Jump : PredNode.OutJumps) { NodeT *SuccNode = Jump->Target; ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain); // This edge is already present in the graph. // this edge is already present in the graph. if (CurEdge != nullptr) { assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr); CurEdge->appendJump(Jump); continue; } // This is a new edge. // this is a new edge. AllEdges.emplace_back(Jump); PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back()); SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back()); Loading @@ -669,7 +649,7 @@ private: /// to B are from A. Such nodes should be adjacent in the optimal ordering; /// the method finds and merges such pairs of nodes. void mergeForcedPairs() { // Find forced pairs of blocks. // Find fallthroughs based on edge weights. for (NodeT &Node : AllNodes) { if (SuccNodes[Node.Index].size() == 1 && PredNodes[SuccNodes[Node.Index][0]].size() == 1 && Loading Loading @@ -719,44 +699,28 @@ private: /// Deterministically compare pairs of chains. auto compareChainPairs = [](const ChainT *A1, const ChainT *B1, const ChainT *A2, const ChainT *B2) { return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id); if (A1 != A2) return A1->Id < A2->Id; return B1->Id < B2->Id; }; double PrevScore = 1e9; while (HotChains.size() > 1) { ChainT *BestChainPred = nullptr; ChainT *BestChainSucc = nullptr; MergeGainT BestGain; // Iterate over all pairs of chains. for (ChainT *ChainPred : HotChains) { // Since the score of merging doesn't increase, we can stop early when // the newly found merge is as good as the previous one. if (BestGain.score() == PrevScore) break; // Get candidates for merging with the current chain. for (const auto &[ChainSucc, Edge] : ChainPred->Edges) { // Ignore loop edges. if (Edge->isSelfEdge()) if (ChainPred == ChainSucc) continue; // Stop early if the combined chain violates the maximum allowed size. if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize) continue; // Don't merge the chains if they have vastly different densities. // We stop early if the ratio between the densities exceeds // MaxMergeDensityRatio. Smaller values of the option result in // fewer merges (hence, more chains), which in turn typically yields // smaller size of the hot code section. double minDensity = std::min(ChainPred->density(), ChainSucc->density()); double maxDensity = std::max(ChainPred->density(), ChainSucc->density()); assert(minDensity > 0.0 && maxDensity > 0.0 && "incorrectly computed chain densities"); const double Ratio = maxDensity / minDensity; if (Ratio > MaxMergeDensityRatio) continue; // Compute the gain of merging the two chains // Compute the gain of merging the two chains. MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge); if (CurGain.score() <= EPS) continue; Loading @@ -768,9 +732,6 @@ private: BestGain = CurGain; BestChainPred = ChainPred; BestChainSucc = ChainSucc; // Stop early when the merge is as good as the previous one. if (BestGain.score() == PrevScore) break; } } } Loading @@ -780,7 +741,6 @@ private: break; // Merge the best pair of chains. PrevScore = BestGain.score(); mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(), BestGain.mergeType()); } Loading Loading @@ -809,25 +769,24 @@ private: } /// Compute the Ext-TSP score for a given node order and a list of jumps. double extTSPScore(const MergedVector<NodeT> &Nodes, const MergedVector<JumpT> &Jumps) const { if (Jumps.empty() || Nodes.empty()) double extTSPScore(const MergedChain &MergedBlocks, const std::vector<JumpT *> &Jumps) const { if (Jumps.empty()) return 0.0; uint64_t CurAddr = 0; Nodes.forEach([&](const NodeT *Node) { MergedBlocks.forEach([&](const NodeT *Node) { Node->EstimatedAddr = CurAddr; CurAddr += Node->Size; }); double Score = 0; Jumps.forEach([&](const JumpT *Jump) { for (JumpT *Jump : Jumps) { const NodeT *SrcBlock = Jump->Source; const NodeT *DstBlock = Jump->Target; Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size, DstBlock->EstimatedAddr, Jump->ExecutionCount, Jump->IsConditional); }); } return Score; } Loading @@ -839,15 +798,17 @@ private: /// element being the corresponding merging type. MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc, ChainEdge *Edge) const { if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) { return Edge->getCachedMergeGain(ChainPred, ChainSucc); } // Precompute jumps between ChainPred and ChainSucc. MergedVector<JumpT> Jumps(Edge->jumps().begin(), Edge->jumps().end()); assert(!Jumps.empty() && "trying to merge chains w/o jumps"); auto Jumps = Edge->jumps(); ChainEdge *EdgePP = ChainPred->getEdge(ChainPred); if (EdgePP != nullptr) Jumps.append(EdgePP->jumps().begin(), EdgePP->jumps().end()); if (EdgePP != nullptr) { Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end()); } assert(!Jumps.empty() && "trying to merge chains w/o jumps"); // This object holds the best chosen gain of merging two chains. MergeGainT Gain = MergeGainT(); Loading Loading @@ -875,6 +836,7 @@ private: Gain.updateIfLessThan( computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y)); if (EnableChainSplitAlongJumps) { // Attach (a part of) ChainPred before the first node of ChainSucc. for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) { const NodeT *SrcBlock = Jump->Source; Loading @@ -892,20 +854,18 @@ private: size_t Offset = DstBlock->CurIndex; tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1}); } } // Try to break ChainPred in various ways and concatenate with ChainSucc. // In practice, applying X2_Y_X1 merging is almost never provides benefits; // thus, we exclude it from consideration to reduce the search space. if (ChainPred->Nodes.size() <= ChainSplitThreshold) { for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) { // Do not split the chain along a jump. const NodeT *BB = ChainPred->Nodes[Offset - 1]; const NodeT *BB2 = ChainPred->Nodes[Offset]; if (BB->isSuccessor(BB2)) continue; // Try to split the chain in different ways. In practice, applying // X2_Y_X1 merging is almost never provides benefits; thus, we exclude // it from consideration to reduce the search space. tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1, MergeTypeT::X2_X1_Y}); } } Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain); return Gain; } Loading @@ -915,20 +875,19 @@ private: /// /// The two chains are not modified in the method. MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc, const MergedVector<JumpT> &Jumps, const std::vector<JumpT *> &Jumps, size_t MergeOffset, MergeTypeT MergeType) const { MergedVector<NodeT> MergedNodes = auto MergedBlocks = mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType); // Do not allow a merge that does not preserve the original entry point. if ((ChainPred->isEntry() || ChainSucc->isEntry()) && !MergedNodes.getFirstNode()->isEntry()) !MergedBlocks.getFirstNode()->isEntry()) return MergeGainT(); // The gain for the new chain. double NewScore = extTSPScore(MergedNodes, Jumps); double CurScore = ChainPred->Score; return MergeGainT(NewScore - CurScore, MergeOffset, MergeType); auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score; return MergeGainT(NewGainScore, MergeOffset, MergeType); } /// Merge chain From into chain Into, update the list of active chains, Loading @@ -938,7 +897,7 @@ private: assert(Into != From && "a chain cannot be merged with itself"); // Merge the nodes. MergedVector<NodeT> MergedNodes = MergedChain MergedNodes = mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); Into->merge(From, MergedNodes.getNodes()); Loading @@ -949,10 +908,8 @@ private: // Update cached ext-tsp score for the new chain. ChainEdge *SelfEdge = Into->getEdge(Into); if (SelfEdge != nullptr) { MergedNodes = MergedVector<NodeT>(Into->Nodes.begin(), Into->Nodes.end()); MergedVector<JumpT> MergedJumps(SelfEdge->jumps().begin(), SelfEdge->jumps().end()); Into->Score = extTSPScore(MergedNodes, MergedJumps); MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end()); Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps()); } // Remove the chain from the list of active chains. Loading Loading @@ -1298,7 +1255,7 @@ private: } /// Compute the change of the distance locality after merging the chains. double distBasedLocalityGain(const MergedVector<NodeT> &MergedBlocks, double distBasedLocalityGain(const MergedChain &MergedBlocks, const std::vector<JumpT *> &Jumps) const { if (Jumps.empty()) return 0.0; Loading Loading @@ -1326,7 +1283,7 @@ private: assert(Into != From && "a chain cannot be merged with itself"); // Merge the nodes. MergedVector<NodeT> MergedNodes = MergedChain MergedNodes = mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType); Into->merge(From, MergedNodes.getNodes()); Loading