Loading clang/lib/CodeGen/CGKokkos.cpp +100 −38 Original line number Diff line number Diff line Loading @@ -195,17 +195,6 @@ std::vector<const ParmVarDecl*> CodeGenFunction::EmitKokkosParallelForInductionVar(const LambdaExpr *Lambda) { const CXXMethodDecl *MD = Lambda->getCallOperator(); assert(MD && "EmitKokkosParallelFor() -- bad method decl from labmda call."); /*const ParmVarDecl *InductionVarDecl = MD->getParamDecl(0); assert(InductionVarDecl && "EmitKokkosParallelFor() -- bad loop variable decl!"); printf("PARAM COUNT: %d\n\n", MD->getNumParams()); EmitVarDecl(*InductionVarDecl); Address Addr = GetAddrOfLocalVar(InductionVarDecl); llvm::Value *Zero = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(Zero, Addr); return InductionVarDecl;*/ std::vector<const ParmVarDecl*> params; Loading Loading @@ -234,17 +223,6 @@ void CodeGenFunction::EmitKokkosParallelForCond(const Expr *BoundsExpr, if (BoundsExpr->getStmtClass() == Expr::BinaryOperatorClass) { RValue RV = EmitAnyExpr(BoundsExpr); LoopEnd = RV.getScalarVal(); } else if (BoundsExpr->getStmtClass() == Expr::CXXTemporaryObjectExprClass) { const CXXTemporaryObjectExpr *CXXTO = dyn_cast<CXXTemporaryObjectExpr>(BoundsExpr); const InitListExpr *UpperBounds = dyn_cast<InitListExpr>(CXXTO->getArg(1)->IgnoreImplicit()); // Create a multiply statement to computer the proper upper bound const Expr *lval = UpperBounds->getInit(0)->IgnoreImplicit(); const Expr *rval = UpperBounds->getInit(1)->IgnoreImplicit(); llvm::Value *lvalue = EmitScalarExpr(lval); llvm::Value *rvalue = EmitScalarExpr(rval); LoopEnd = Builder.CreateMul(lvalue, rvalue); } else { LoopEnd = EmitScalarExpr(BoundsExpr); } Loading Loading @@ -293,6 +271,30 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, return false; } // Build the queue of dimensions (upper bounds) std::queue<const Expr *> DimQueue; if (BE->getStmtClass() == Expr::CXXTemporaryObjectExprClass) { const CXXTemporaryObjectExpr *CXXTO = dyn_cast<CXXTemporaryObjectExpr>(BE); const InitListExpr *UpperBounds = dyn_cast<InitListExpr>(CXXTO->getArg(1)->IgnoreImplicit()); for (int i = 0; i<UpperBounds->getNumInits(); i++) { const Expr *val = UpperBounds->getInit(i)->IgnoreImplicit(); DimQueue.push(val); } } else { DimQueue.push(BE); } // Get the induction varaibles std::vector<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // These are extra steps that we can probably optimize away BE = DimQueue.front(); DimQueue.pop(); const ParmVarDecl *InductionVarDecl = params.at(0); // Create all jump destinations and basic blocks in the order they // appear in the IR. JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond"); Loading Loading @@ -323,9 +325,6 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, // // TODO: Do we need to "relax" these assumptions to support broader code coverage? // This is 'equivalent' to the Init statement in a traditional for loop (e.g. int i = 0). /*const ParmVarDecl *InductionVarDecl; InductionVarDecl = EmitKokkosParallelForInductionVar(Lambda);*/ std::vector<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // Create the sync region. PushSyncRegion(); Loading @@ -348,7 +347,6 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, LexicalScope ConditionScope(*this, R); // Create the conditional. const ParmVarDecl *InductionVarDecl = params.at(0); EmitKokkosParallelForCond(BE, InductionVarDecl, Detach, End, Sync); if (PForScope.requiresCleanups()) { Loading Loading @@ -380,12 +378,16 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, Builder.CreateAlignedStore(GInductionVal, TLInductionVar, getContext().getTypeAlignInChars(RefType)); { if (DimQueue.size() == 0) { // Create a separate cleanup scope for the body, in case it is not // a compound statement. InKokkosConstruct = true; RunCleanupsScope BodyScope(*this); EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, params); } } auto tmp = AllocaInsertPt; Loading @@ -409,12 +411,10 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, Builder.CreateReattach(Increment, SRStart); EmitBlock(Increment); for (const ParmVarDecl* IVD : params) { llvm::Value *IncVal = Builder.CreateLoad(GetAddrOfLocalVar(IVD)); llvm::Value *One = llvm::ConstantInt::get(ConvertType(IVD->getType()), 1); llvm::Value *IncVal = Builder.CreateLoad(GetAddrOfLocalVar(InductionVarDecl)); llvm::Value *One = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 1); IncVal = Builder.CreateAdd(IncVal, One); Builder.CreateStore(IncVal, GetAddrOfLocalVar(IVD)); } Builder.CreateStore(IncVal, GetAddrOfLocalVar(InductionVarDecl)); BreakContinueStack.pop_back(); ConditionScope.ForceCleanup(); Loading @@ -430,6 +430,68 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, return true; } // This is in charge of building an inner loop bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::vector<const ParmVarDecl*> params) { // Get arguments int pos = DimQueue.size(); const Expr *BE = DimQueue.front(); DimQueue.pop(); const ParmVarDecl *InductionVarDecl = params.at(pos); llvm::BasicBlock *Zero = createBasicBlock("kokkos.forall.zero" + std::to_string(pos)); JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond" + std::to_string(pos)); llvm::BasicBlock *LoopBody = createBasicBlock("kokkos.forall.body" + std::to_string(pos)); llvm::BasicBlock *Increment = createBasicBlock("kokkos.forall.inc" + std::to_string(pos)); JumpDest EndDest = getJumpDestInCurrentScope("kokkos.forall.endlbl" + std::to_string(pos)); llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end" + std::to_string(pos)); // Zero out the induction variable EmitBlock(Zero); llvm::Value *ZeroVal = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(ZeroVal, GetAddrOfLocalVar(InductionVarDecl)); // Create the conditional. llvm::BasicBlock *ConditionBlock = Condition.getBlock(); EmitBlock(ConditionBlock); EmitKokkosParallelForCond(BE, InductionVarDecl, LoopBody, nullptr, EndDest); EmitBlock(LoopBody); { if (DimQueue.size() == 0) { // Create a separate cleanup scope for the body, in case it is not // a compound statement. InKokkosConstruct = true; RunCleanupsScope BodyScope(*this); EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, params); } } EmitBlock(Increment); llvm::Value *IncVal = Builder.CreateLoad(GetAddrOfLocalVar(InductionVarDecl)); llvm::Value *One = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 1); IncVal = Builder.CreateAdd(IncVal, One); Builder.CreateStore(IncVal, GetAddrOfLocalVar(InductionVarDecl)); EmitBranch(ConditionBlock); if (TopBlock != nullptr) { EmitBranch(TopBlock); } EmitBlock(EndDest.getBlock()); EmitBlock(End, true); return true; } bool CodeGenFunction::EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs) { DiagnosticsEngine &Diags = CGM.getDiags(); Loading clang/lib/CodeGen/CodeGenFunction.h +6 −0 Original line number Diff line number Diff line Loading @@ -42,6 +42,8 @@ #include "llvm/Transforms/Utils/SanitizerStats.h" #include "llvm/IR/ValueMap.h" #include <queue> namespace llvm { class BasicBlock; class LLVMContext; Loading Loading @@ -3526,6 +3528,10 @@ public: llvm::BasicBlock *ExitBlock, JumpDest &Sync); bool EmitKokkosParallelFor(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::vector<const ParmVarDecl*> params); bool EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool InKokkosConstruct = false; // FIXME: Should/can we refactor this away? Loading Loading
clang/lib/CodeGen/CGKokkos.cpp +100 −38 Original line number Diff line number Diff line Loading @@ -195,17 +195,6 @@ std::vector<const ParmVarDecl*> CodeGenFunction::EmitKokkosParallelForInductionVar(const LambdaExpr *Lambda) { const CXXMethodDecl *MD = Lambda->getCallOperator(); assert(MD && "EmitKokkosParallelFor() -- bad method decl from labmda call."); /*const ParmVarDecl *InductionVarDecl = MD->getParamDecl(0); assert(InductionVarDecl && "EmitKokkosParallelFor() -- bad loop variable decl!"); printf("PARAM COUNT: %d\n\n", MD->getNumParams()); EmitVarDecl(*InductionVarDecl); Address Addr = GetAddrOfLocalVar(InductionVarDecl); llvm::Value *Zero = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(Zero, Addr); return InductionVarDecl;*/ std::vector<const ParmVarDecl*> params; Loading Loading @@ -234,17 +223,6 @@ void CodeGenFunction::EmitKokkosParallelForCond(const Expr *BoundsExpr, if (BoundsExpr->getStmtClass() == Expr::BinaryOperatorClass) { RValue RV = EmitAnyExpr(BoundsExpr); LoopEnd = RV.getScalarVal(); } else if (BoundsExpr->getStmtClass() == Expr::CXXTemporaryObjectExprClass) { const CXXTemporaryObjectExpr *CXXTO = dyn_cast<CXXTemporaryObjectExpr>(BoundsExpr); const InitListExpr *UpperBounds = dyn_cast<InitListExpr>(CXXTO->getArg(1)->IgnoreImplicit()); // Create a multiply statement to computer the proper upper bound const Expr *lval = UpperBounds->getInit(0)->IgnoreImplicit(); const Expr *rval = UpperBounds->getInit(1)->IgnoreImplicit(); llvm::Value *lvalue = EmitScalarExpr(lval); llvm::Value *rvalue = EmitScalarExpr(rval); LoopEnd = Builder.CreateMul(lvalue, rvalue); } else { LoopEnd = EmitScalarExpr(BoundsExpr); } Loading Loading @@ -293,6 +271,30 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, return false; } // Build the queue of dimensions (upper bounds) std::queue<const Expr *> DimQueue; if (BE->getStmtClass() == Expr::CXXTemporaryObjectExprClass) { const CXXTemporaryObjectExpr *CXXTO = dyn_cast<CXXTemporaryObjectExpr>(BE); const InitListExpr *UpperBounds = dyn_cast<InitListExpr>(CXXTO->getArg(1)->IgnoreImplicit()); for (int i = 0; i<UpperBounds->getNumInits(); i++) { const Expr *val = UpperBounds->getInit(i)->IgnoreImplicit(); DimQueue.push(val); } } else { DimQueue.push(BE); } // Get the induction varaibles std::vector<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // These are extra steps that we can probably optimize away BE = DimQueue.front(); DimQueue.pop(); const ParmVarDecl *InductionVarDecl = params.at(0); // Create all jump destinations and basic blocks in the order they // appear in the IR. JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond"); Loading Loading @@ -323,9 +325,6 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, // // TODO: Do we need to "relax" these assumptions to support broader code coverage? // This is 'equivalent' to the Init statement in a traditional for loop (e.g. int i = 0). /*const ParmVarDecl *InductionVarDecl; InductionVarDecl = EmitKokkosParallelForInductionVar(Lambda);*/ std::vector<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // Create the sync region. PushSyncRegion(); Loading @@ -348,7 +347,6 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, LexicalScope ConditionScope(*this, R); // Create the conditional. const ParmVarDecl *InductionVarDecl = params.at(0); EmitKokkosParallelForCond(BE, InductionVarDecl, Detach, End, Sync); if (PForScope.requiresCleanups()) { Loading Loading @@ -380,12 +378,16 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, Builder.CreateAlignedStore(GInductionVal, TLInductionVar, getContext().getTypeAlignInChars(RefType)); { if (DimQueue.size() == 0) { // Create a separate cleanup scope for the body, in case it is not // a compound statement. InKokkosConstruct = true; RunCleanupsScope BodyScope(*this); EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, params); } } auto tmp = AllocaInsertPt; Loading @@ -409,12 +411,10 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, Builder.CreateReattach(Increment, SRStart); EmitBlock(Increment); for (const ParmVarDecl* IVD : params) { llvm::Value *IncVal = Builder.CreateLoad(GetAddrOfLocalVar(IVD)); llvm::Value *One = llvm::ConstantInt::get(ConvertType(IVD->getType()), 1); llvm::Value *IncVal = Builder.CreateLoad(GetAddrOfLocalVar(InductionVarDecl)); llvm::Value *One = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 1); IncVal = Builder.CreateAdd(IncVal, One); Builder.CreateStore(IncVal, GetAddrOfLocalVar(IVD)); } Builder.CreateStore(IncVal, GetAddrOfLocalVar(InductionVarDecl)); BreakContinueStack.pop_back(); ConditionScope.ForceCleanup(); Loading @@ -430,6 +430,68 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, return true; } // This is in charge of building an inner loop bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::vector<const ParmVarDecl*> params) { // Get arguments int pos = DimQueue.size(); const Expr *BE = DimQueue.front(); DimQueue.pop(); const ParmVarDecl *InductionVarDecl = params.at(pos); llvm::BasicBlock *Zero = createBasicBlock("kokkos.forall.zero" + std::to_string(pos)); JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond" + std::to_string(pos)); llvm::BasicBlock *LoopBody = createBasicBlock("kokkos.forall.body" + std::to_string(pos)); llvm::BasicBlock *Increment = createBasicBlock("kokkos.forall.inc" + std::to_string(pos)); JumpDest EndDest = getJumpDestInCurrentScope("kokkos.forall.endlbl" + std::to_string(pos)); llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end" + std::to_string(pos)); // Zero out the induction variable EmitBlock(Zero); llvm::Value *ZeroVal = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(ZeroVal, GetAddrOfLocalVar(InductionVarDecl)); // Create the conditional. llvm::BasicBlock *ConditionBlock = Condition.getBlock(); EmitBlock(ConditionBlock); EmitKokkosParallelForCond(BE, InductionVarDecl, LoopBody, nullptr, EndDest); EmitBlock(LoopBody); { if (DimQueue.size() == 0) { // Create a separate cleanup scope for the body, in case it is not // a compound statement. InKokkosConstruct = true; RunCleanupsScope BodyScope(*this); EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, params); } } EmitBlock(Increment); llvm::Value *IncVal = Builder.CreateLoad(GetAddrOfLocalVar(InductionVarDecl)); llvm::Value *One = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 1); IncVal = Builder.CreateAdd(IncVal, One); Builder.CreateStore(IncVal, GetAddrOfLocalVar(InductionVarDecl)); EmitBranch(ConditionBlock); if (TopBlock != nullptr) { EmitBranch(TopBlock); } EmitBlock(EndDest.getBlock()); EmitBlock(End, true); return true; } bool CodeGenFunction::EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs) { DiagnosticsEngine &Diags = CGM.getDiags(); Loading
clang/lib/CodeGen/CodeGenFunction.h +6 −0 Original line number Diff line number Diff line Loading @@ -42,6 +42,8 @@ #include "llvm/Transforms/Utils/SanitizerStats.h" #include "llvm/IR/ValueMap.h" #include <queue> namespace llvm { class BasicBlock; class LLVMContext; Loading Loading @@ -3526,6 +3528,10 @@ public: llvm::BasicBlock *ExitBlock, JumpDest &Sync); bool EmitKokkosParallelFor(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::vector<const ParmVarDecl*> params); bool EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool InKokkosConstruct = false; // FIXME: Should/can we refactor this away? Loading