Loading clang/lib/CodeGen/CGKokkos.cpp +45 −14 Original line number Diff line number Diff line Loading @@ -202,7 +202,7 @@ CodeGenFunction::EmitKokkosParallelForInductionVar(const LambdaExpr *Lambda) { const ParmVarDecl *InductionVarDecl = MD->getParamDecl(i); assert(InductionVarDecl && "EmitKokkosParallelFor() -- bad loop variable decl!"); EmitVarDecl(*InductionVarDecl); //EmitVarDecl(*InductionVarDecl); params.push_back(InductionVarDecl); } Loading Loading @@ -311,6 +311,7 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, // This is 'equivalent' to the Init statement in a traditional for loop (e.g. int i = 0). const ParmVarDecl *InductionVarDecl; InductionVarDecl = EmitKokkosParallelForInductionVar(Lambda).at(0); EmitVarDecl(*InductionVarDecl); // Create the sync region. PushSyncRegion(); Loading Loading @@ -443,7 +444,8 @@ bool CodeGenFunction::EmitKokkosParallelForMD(const CallExpr *CE, std::string PF std::vector<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // Build the inner loops, and eventually the body return EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, StartQueue, params, ForallAttrs); std::vector<std::pair<llvm::Value*, llvm::AllocaInst*>> TLIVarList; return EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, StartQueue, params, TLIVarList, ForallAttrs); } // This is in charge of building an inner loop. It works as a recursive function to allow the loops Loading @@ -456,6 +458,7 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * std::vector<const Expr*> DimQueue, std::vector<const Expr*> StartQueue, std::vector<const ParmVarDecl*> params, std::vector<std::pair<llvm::Value*, llvm::AllocaInst*>> TLIVarList, ArrayRef<const Attr *> ForallAttrs) { // Load the data we need int pos = DimQueue.size(); Loading @@ -478,10 +481,11 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * JumpDest Sync = getJumpDestInCurrentScope("kokkos.forall.sync" + std::to_string(pos)); llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end" + std::to_string(pos)); // Set the induction variable's starting point /*// Set the induction variable's starting point EmitBlock(InductionSet); EmitVarDecl(*InductionVarDecl); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl));*/ // Extract a conveince block and setup the lexical scope based on // the lambda's source range. Loading @@ -508,6 +512,12 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * llvm::Instruction *SRStart = EmitSyncRegionStart(); CurSyncRegion->setSyncRegionStart(SRStart); // Set the induction variable's starting point EmitBlock(InductionSet); EmitVarDecl(*InductionVarDecl); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); // TODO: Need to check attributes for spawning strategy. LoopStack.setSpawnStrategy(LoopAttributes::DAC); Loading Loading @@ -554,6 +564,9 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * InductionVarDecl->getName() + ".detach"); Builder.CreateAlignedStore(GInductionVal, TLInductionVar, getContext().getTypeAlignInChars(RefType)); std::pair<llvm::Value*, llvm::AllocaInst*> pair(GInductionVar, TLInductionVar); TLIVarList.push_back(pair); { if (DimQueue.size() == 0) { // Create a separate cleanup scope for the body, in case it is not Loading @@ -562,8 +575,26 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * RunCleanupsScope BodyScope(*this); EmitStmt(Lambda->getBody()); InKokkosConstruct = false; // Modify the body to use the ''detach''-local induction variable. // At this point in the codegen, the body block has been emitted // and we can safely replace the ''sequential`` induction variable // within the detach basic block. llvm::BasicBlock *CurrentBlock = Builder.GetInsertBlock(); for (int i = 0; i<TLIVarList.size(); i++) { auto TLVar = TLIVarList.at(i).second; auto GInductionVar = TLIVarList.at(i).first; for(llvm::Value::use_iterator UI = GInductionVar->use_begin(), UE = GInductionVar->use_end(); UI != UE; ) { llvm::Use &U = *UI++; llvm::Instruction *I = cast<llvm::Instruction>(U.getUser()); if (I->getParent() == CurrentBlock) U.set(TLVar); } } } else { EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, StartQueue, params, ForallAttrs); EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, StartQueue, params, TLIVarList, ForallAttrs); } } Loading @@ -575,14 +606,14 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * // At this point in the codegen, the body block has been emitted // and we can safely replace the ''sequential`` induction variable // within the detach basic block. llvm::BasicBlock *CurrentBlock = Builder.GetInsertBlock(); /* llvm::BasicBlock *CurrentBlock = Builder.GetInsertBlock(); for(llvm::Value::use_iterator UI = GInductionVar->use_begin(), UE = GInductionVar->use_end(); UI != UE; ) { llvm::Use &U = *UI++; llvm::Instruction *I = cast<llvm::Instruction>(U.getUser()); if (I->getParent() == CurrentBlock) U.set(TLInductionVar); } }*/ EmitBlock(Reattach.getBlock()); Builder.CreateReattach(Increment, SRStart); Loading @@ -601,9 +632,9 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * PForScope.ForceCleanup(); LoopStack.pop(); if (TopBlock != nullptr) { /*if (TopBlock != nullptr) { EmitBranch(TopBlock); } }*/ EmitBlock(Sync.getBlock()); Builder.CreateSync(End, SRStart); Loading clang/lib/CodeGen/CodeGenFunction.h +1 −0 Original line number Diff line number Diff line Loading @@ -3535,6 +3535,7 @@ public: std::vector<const Expr*> DimQueue, std::vector<const Expr*> StartQueue, std::vector<const ParmVarDecl*> params, std::vector<std::pair<llvm::Value*, llvm::AllocaInst*>> TLIVarList, ArrayRef<const Attr *> ForallAttrs); bool EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool InKokkosConstruct = false; // FIXME: Should/can we refactor this away? Loading Loading
clang/lib/CodeGen/CGKokkos.cpp +45 −14 Original line number Diff line number Diff line Loading @@ -202,7 +202,7 @@ CodeGenFunction::EmitKokkosParallelForInductionVar(const LambdaExpr *Lambda) { const ParmVarDecl *InductionVarDecl = MD->getParamDecl(i); assert(InductionVarDecl && "EmitKokkosParallelFor() -- bad loop variable decl!"); EmitVarDecl(*InductionVarDecl); //EmitVarDecl(*InductionVarDecl); params.push_back(InductionVarDecl); } Loading Loading @@ -311,6 +311,7 @@ bool CodeGenFunction::EmitKokkosParallelFor(const CallExpr *CE, // This is 'equivalent' to the Init statement in a traditional for loop (e.g. int i = 0). const ParmVarDecl *InductionVarDecl; InductionVarDecl = EmitKokkosParallelForInductionVar(Lambda).at(0); EmitVarDecl(*InductionVarDecl); // Create the sync region. PushSyncRegion(); Loading Loading @@ -443,7 +444,8 @@ bool CodeGenFunction::EmitKokkosParallelForMD(const CallExpr *CE, std::string PF std::vector<const ParmVarDecl*> params = EmitKokkosParallelForInductionVar(Lambda); // Build the inner loops, and eventually the body return EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, StartQueue, params, ForallAttrs); std::vector<std::pair<llvm::Value*, llvm::AllocaInst*>> TLIVarList; return EmitKokkosInnerLoop(CE, Lambda, nullptr, DimQueue, StartQueue, params, TLIVarList, ForallAttrs); } // This is in charge of building an inner loop. It works as a recursive function to allow the loops Loading @@ -456,6 +458,7 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * std::vector<const Expr*> DimQueue, std::vector<const Expr*> StartQueue, std::vector<const ParmVarDecl*> params, std::vector<std::pair<llvm::Value*, llvm::AllocaInst*>> TLIVarList, ArrayRef<const Attr *> ForallAttrs) { // Load the data we need int pos = DimQueue.size(); Loading @@ -478,10 +481,11 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * JumpDest Sync = getJumpDestInCurrentScope("kokkos.forall.sync" + std::to_string(pos)); llvm::BasicBlock *End = createBasicBlock("kokkos.forall.end" + std::to_string(pos)); // Set the induction variable's starting point /*// Set the induction variable's starting point EmitBlock(InductionSet); EmitVarDecl(*InductionVarDecl); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl));*/ // Extract a conveince block and setup the lexical scope based on // the lambda's source range. Loading @@ -508,6 +512,12 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * llvm::Instruction *SRStart = EmitSyncRegionStart(); CurSyncRegion->setSyncRegionStart(SRStart); // Set the induction variable's starting point EmitBlock(InductionSet); EmitVarDecl(*InductionVarDecl); llvm::Value *LoopStart = EmitScalarExpr(SE); Builder.CreateStore(LoopStart, GetAddrOfLocalVar(InductionVarDecl)); // TODO: Need to check attributes for spawning strategy. LoopStack.setSpawnStrategy(LoopAttributes::DAC); Loading Loading @@ -554,6 +564,9 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * InductionVarDecl->getName() + ".detach"); Builder.CreateAlignedStore(GInductionVal, TLInductionVar, getContext().getTypeAlignInChars(RefType)); std::pair<llvm::Value*, llvm::AllocaInst*> pair(GInductionVar, TLInductionVar); TLIVarList.push_back(pair); { if (DimQueue.size() == 0) { // Create a separate cleanup scope for the body, in case it is not Loading @@ -562,8 +575,26 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * RunCleanupsScope BodyScope(*this); EmitStmt(Lambda->getBody()); InKokkosConstruct = false; // Modify the body to use the ''detach''-local induction variable. // At this point in the codegen, the body block has been emitted // and we can safely replace the ''sequential`` induction variable // within the detach basic block. llvm::BasicBlock *CurrentBlock = Builder.GetInsertBlock(); for (int i = 0; i<TLIVarList.size(); i++) { auto TLVar = TLIVarList.at(i).second; auto GInductionVar = TLIVarList.at(i).first; for(llvm::Value::use_iterator UI = GInductionVar->use_begin(), UE = GInductionVar->use_end(); UI != UE; ) { llvm::Use &U = *UI++; llvm::Instruction *I = cast<llvm::Instruction>(U.getUser()); if (I->getParent() == CurrentBlock) U.set(TLVar); } } } else { EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, StartQueue, params, ForallAttrs); EmitKokkosInnerLoop(CE, Lambda, ConditionBlock, DimQueue, StartQueue, params, TLIVarList, ForallAttrs); } } Loading @@ -575,14 +606,14 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * // At this point in the codegen, the body block has been emitted // and we can safely replace the ''sequential`` induction variable // within the detach basic block. llvm::BasicBlock *CurrentBlock = Builder.GetInsertBlock(); /* llvm::BasicBlock *CurrentBlock = Builder.GetInsertBlock(); for(llvm::Value::use_iterator UI = GInductionVar->use_begin(), UE = GInductionVar->use_end(); UI != UE; ) { llvm::Use &U = *UI++; llvm::Instruction *I = cast<llvm::Instruction>(U.getUser()); if (I->getParent() == CurrentBlock) U.set(TLInductionVar); } }*/ EmitBlock(Reattach.getBlock()); Builder.CreateReattach(Increment, SRStart); Loading @@ -601,9 +632,9 @@ bool CodeGenFunction::EmitKokkosInnerLoop(const CallExpr *CE, const LambdaExpr * PForScope.ForceCleanup(); LoopStack.pop(); if (TopBlock != nullptr) { /*if (TopBlock != nullptr) { EmitBranch(TopBlock); } }*/ EmitBlock(Sync.getBlock()); Builder.CreateSync(End, SRStart); Loading
clang/lib/CodeGen/CodeGenFunction.h +1 −0 Original line number Diff line number Diff line Loading @@ -3535,6 +3535,7 @@ public: std::vector<const Expr*> DimQueue, std::vector<const Expr*> StartQueue, std::vector<const ParmVarDecl*> params, std::vector<std::pair<llvm::Value*, llvm::AllocaInst*>> TLIVarList, ArrayRef<const Attr *> ForallAttrs); bool EmitKokkosParallelReduce(const CallExpr *CE, ArrayRef<const Attr *> Attrs); bool InKokkosConstruct = false; // FIXME: Should/can we refactor this away? Loading