Loading clang/lib/CodeGen/CGKokkos.cpp +31 −17 Original line number Diff line number Diff line Loading @@ -203,10 +203,6 @@ CodeGenFunction::EmitKokkosParallelForInductionVar(const LambdaExpr *Lambda) { assert(InductionVarDecl && "EmitKokkosParallelFor() -- bad loop variable decl!"); EmitVarDecl(*InductionVarDecl); Address Addr = GetAddrOfLocalVar(InductionVarDecl); llvm::Value *Zero = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(Zero, Addr); params.push(InductionVarDecl); } Loading Loading @@ -273,11 +269,18 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, // Build the queue of dimensions (upper bounds) std::queue<const Expr *> DimQueue; std::queue<const Expr *> StartQueue; if (BE->getStmtClass() == Expr::CXXTemporaryObjectExprClass) { const CXXTemporaryObjectExpr *CXXTO = dyn_cast<CXXTemporaryObjectExpr>(BE); const InitListExpr *StartingBounds = dyn_cast<InitListExpr>(CXXTO->getArg(0)->IgnoreImplicit()); const InitListExpr *UpperBounds = dyn_cast<InitListExpr>(CXXTO->getArg(1)->IgnoreImplicit()); for (int i = 0; i<StartingBounds->getNumInits(); i++) { const Expr *val = StartingBounds->getInit(i)->IgnoreImplicit(); StartQueue.push(val); } for (int i = 0; i<UpperBounds->getNumInits(); i++) { const Expr *val = UpperBounds->getInit(i)->IgnoreImplicit(); DimQueue.push(val); Loading @@ -286,16 +289,10 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, DimQueue.push(BE); } // Get the induction varaibles std::queue<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // These are extra steps that we can probably optimize away BE = DimQueue.front(); DimQueue.pop(); const ParmVarDecl *InductionVarDecl = params.front(); params.pop(); // Create all jump destinations and basic blocks in the order they // appear in the IR. JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond"); Loading @@ -307,6 +304,19 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, JumpDest Sync = getJumpDestInCurrentScope("kokkos.forall.sync"); llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end"); // Get the induction variables, and set the first. If its a single-layer loop, this will be the only variable // We do this here so we don't set any inner loop variable twice in a row std::queue<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); const ParmVarDecl *InductionVarDecl = params.front(); params.pop(); const Expr *SE = StartQueue.front(); StartQueue.pop(); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); // Extract a conveince block and setup the lexical scope based on // the lambda's source range. llvm::BasicBlock *ConditionBlock = Condition.getBlock(); Loading Loading @@ -387,7 +397,7 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, params); EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, StartQueue, params); } } Loading Loading @@ -435,16 +445,20 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::queue<const Expr*> StartQueue, std::queue<const ParmVarDecl*> params) { // Get arguments // Load the data we need int pos = DimQueue.size(); const Expr *BE = DimQueue.front(); DimQueue.pop(); const Expr *SE = StartQueue.front(); StartQueue.pop(); const ParmVarDecl *InductionVarDecl = params.front(); params.pop(); llvm::BasicBlock *Zero = createBasicBlock("kokkos.forall.zero" + std::to_string(pos)); llvm::BasicBlock *InductionSet = createBasicBlock("kokkos.forall.set" + std::to_string(pos)); JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond" + std::to_string(pos)); llvm::BasicBlock *LoopBody = createBasicBlock("kokkos.forall.body" + std::to_string(pos)); llvm::BasicBlock *Increment = createBasicBlock("kokkos.forall.inc" + std::to_string(pos)); Loading @@ -452,9 +466,9 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end" + std::to_string(pos)); // Zero out the induction variable EmitBlock(Zero); llvm::Value *ZeroVal = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(ZeroVal, GetAddrOfLocalVar(InductionVarDecl)); EmitBlock(InductionSet); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); // Create the conditional. llvm::BasicBlock *ConditionBlock = Condition.getBlock(); Loading @@ -472,7 +486,7 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, params); EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, StartQueue, params); } } Loading clang/lib/CodeGen/CodeGenFunction.h +1 −0 Original line number Diff line number Diff line Loading @@ -3531,6 +3531,7 @@ public: bool EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::queue<const Expr*> StartQueue, std::queue<const ParmVarDecl*> params); bool EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool InKokkosConstruct = false; // FIXME: Should/can we refactor this away? Loading Loading
clang/lib/CodeGen/CGKokkos.cpp +31 −17 Original line number Diff line number Diff line Loading @@ -203,10 +203,6 @@ CodeGenFunction::EmitKokkosParallelForInductionVar(const LambdaExpr *Lambda) { assert(InductionVarDecl && "EmitKokkosParallelFor() -- bad loop variable decl!"); EmitVarDecl(*InductionVarDecl); Address Addr = GetAddrOfLocalVar(InductionVarDecl); llvm::Value *Zero = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(Zero, Addr); params.push(InductionVarDecl); } Loading Loading @@ -273,11 +269,18 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, // Build the queue of dimensions (upper bounds) std::queue<const Expr *> DimQueue; std::queue<const Expr *> StartQueue; if (BE->getStmtClass() == Expr::CXXTemporaryObjectExprClass) { const CXXTemporaryObjectExpr *CXXTO = dyn_cast<CXXTemporaryObjectExpr>(BE); const InitListExpr *StartingBounds = dyn_cast<InitListExpr>(CXXTO->getArg(0)->IgnoreImplicit()); const InitListExpr *UpperBounds = dyn_cast<InitListExpr>(CXXTO->getArg(1)->IgnoreImplicit()); for (int i = 0; i<StartingBounds->getNumInits(); i++) { const Expr *val = StartingBounds->getInit(i)->IgnoreImplicit(); StartQueue.push(val); } for (int i = 0; i<UpperBounds->getNumInits(); i++) { const Expr *val = UpperBounds->getInit(i)->IgnoreImplicit(); DimQueue.push(val); Loading @@ -286,16 +289,10 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, DimQueue.push(BE); } // Get the induction varaibles std::queue<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // These are extra steps that we can probably optimize away BE = DimQueue.front(); DimQueue.pop(); const ParmVarDecl *InductionVarDecl = params.front(); params.pop(); // Create all jump destinations and basic blocks in the order they // appear in the IR. JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond"); Loading @@ -307,6 +304,19 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, JumpDest Sync = getJumpDestInCurrentScope("kokkos.forall.sync"); llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end"); // Get the induction variables, and set the first. If its a single-layer loop, this will be the only variable // We do this here so we don't set any inner loop variable twice in a row std::queue<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); const ParmVarDecl *InductionVarDecl = params.front(); params.pop(); const Expr *SE = StartQueue.front(); StartQueue.pop(); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); // Extract a conveince block and setup the lexical scope based on // the lambda's source range. llvm::BasicBlock *ConditionBlock = Condition.getBlock(); Loading Loading @@ -387,7 +397,7 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, params); EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, StartQueue, params); } } Loading Loading @@ -435,16 +445,20 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::queue<const Expr*> StartQueue, std::queue<const ParmVarDecl*> params) { // Get arguments // Load the data we need int pos = DimQueue.size(); const Expr *BE = DimQueue.front(); DimQueue.pop(); const Expr *SE = StartQueue.front(); StartQueue.pop(); const ParmVarDecl *InductionVarDecl = params.front(); params.pop(); llvm::BasicBlock *Zero = createBasicBlock("kokkos.forall.zero" + std::to_string(pos)); llvm::BasicBlock *InductionSet = createBasicBlock("kokkos.forall.set" + std::to_string(pos)); JumpDest Condition = getJumpDestInCurrentScope("kokkos.forall.cond" + std::to_string(pos)); llvm::BasicBlock *LoopBody = createBasicBlock("kokkos.forall.body" + std::to_string(pos)); llvm::BasicBlock *Increment = createBasicBlock("kokkos.forall.inc" + std::to_string(pos)); Loading @@ -452,9 +466,9 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end" + std::to_string(pos)); // Zero out the induction variable EmitBlock(Zero); llvm::Value *ZeroVal = llvm::ConstantInt::get(ConvertType(InductionVarDecl->getType()), 0); Builder.CreateStore(ZeroVal, GetAddrOfLocalVar(InductionVarDecl)); EmitBlock(InductionSet); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); // Create the conditional. llvm::BasicBlock *ConditionBlock = Condition.getBlock(); Loading @@ -472,7 +486,7 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * EmitStmt(Lambda->getBody()); InKokkosConstruct = false; } else { EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, params); EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, StartQueue, params); } } Loading
clang/lib/CodeGen/CodeGenFunction.h +1 −0 Original line number Diff line number Diff line Loading @@ -3531,6 +3531,7 @@ public: bool EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr *Lambda, llvm::BasicBlock *TopBlock, std::queue<const Expr*> DimQueue, std::queue<const Expr*> StartQueue, std::queue<const ParmVarDecl*> params); bool EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool InKokkosConstruct = false; // FIXME: Should/can we refactor this away? Loading