Commit 0232a4f9 authored by Stelle, George Widgery's avatar Stelle, George Widgery Committed by George Stelle
Browse files

Fixed run size

parent 175e7002
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
#include <stdint.h>

#define ocl_mmap(a, n) __kitsune_opencl_mmap_marker((void*)a, n)
void __kitsune_opencl_mmap_marker(void* ptr, uint64_t n);
extern "C" void __kitsune_opencl_mmap_marker(void* ptr, uint64_t n);
#define spawn _kitsune_spawn
#define sync _kitsune_sync
#define forall _kitsune_forall
+11 −2
Original line number Diff line number Diff line
@@ -367,6 +367,7 @@ void SPIRVLoop::processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI,
  PassManager->add(createReassociatePass());
  PassManager->add(createGVNPass());
  PassManager->add(createCFGSimplificationPass());
  PassManager->add(createLoopVectorizePass());
  PassManager->add(createSLPVectorizerPass());
  //PassManager->add(createBreakCriticalEdgesPass());
  PassManager->add(createConstantPropagationPass());
@@ -477,7 +478,15 @@ void SPIRVLoop::processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI,
  }


  Value *RunSize = B.CreateSub(TripCount, ConstantInt::get(TripCount->getType(), 1));
  Value *Grainsize = TL.getGrainsize() ?  
    ConstantInt::get(TripCount->getType(), TL.getGrainsize()) :
    OrderedInputs[2]; 

  Value *RunSizeQ = B.CreateUDiv(TripCount, Grainsize);
  Value *RunRem = B.CreateURem(TripCount, Grainsize);
  Value *IsRem = B.CreateICmp(ICmpInst::ICMP_UGT, RunRem, ConstantInt::get(RunRem->getType(), 0)); 
  Value *IsRemAdd = B.CreateZExt(IsRem, RunSizeQ->getType()); 
  Value *RunSize = B.CreateAdd(RunSizeQ, IsRemAdd);  
  
  B.CreateCall(KitsuneGPUSetRunSize, { KernelID, RunSize });