Loading polly/lib/CodeGen/IslAst.cpp +205 −6 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "polly/CodeGen/IslAst.h" #include "polly/LinkAllPasses.h" #include "polly/Dependences.h" #include "polly/ScopInfo.h" #define DEBUG_TYPE "polly-ast" Loading @@ -43,10 +44,14 @@ static cl::opt<bool> UseContext("polly-ast-use-context", cl::desc("Use context"), cl::Hidden, cl::init(false), cl::ZeroOrMore); static cl::opt<bool> DetectParallel("polly-ast-detect-parallel", cl::desc("Detect parallelism"), cl::Hidden, cl::init(false), cl::ZeroOrMore); namespace polly { class IslAst { public: IslAst(Scop *Scop); IslAst(Scop *Scop, Dependences &D); ~IslAst(); Loading @@ -72,7 +77,182 @@ static void IslAstUserFree(void *User) free(UserStruct); } static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node, // Information about an ast node. struct AstNodeUserInfo { // The node is the outermost parallel loop. int IsOutermostParallel; }; // Temporary information used when building the ast. struct AstBuildUserInfo { // The dependence information. Dependences *Deps; // We are inside a parallel for node. int InParallelFor; }; // Print a loop annotated with OpenMP pragmas. static __isl_give isl_printer * printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer, __isl_take isl_ast_print_options *PrintOptions, AstNodeUserInfo *Info) { if (Info && Info->IsOutermostParallel) { Printer = isl_printer_start_line(Printer); if (Info->IsOutermostParallel) Printer = isl_printer_print_str(Printer, "#pragma omp parallel for"); Printer = isl_printer_end_line(Printer); } return isl_ast_node_for_print(Node, Printer, PrintOptions); } // Print an isl_ast_for. static __isl_give isl_printer * printFor(__isl_take isl_printer *Printer, __isl_take isl_ast_print_options *PrintOptions, __isl_keep isl_ast_node *Node, void *User) { isl_id *Id = isl_ast_node_get_annotation(Node); if (!Id) return isl_ast_node_for_print(Node, Printer, PrintOptions); struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id); Printer = printParallelFor(Node, Printer, PrintOptions, Info); isl_id_free(Id); return Printer; } // Allocate an AstNodeInfo structure and initialize it with default values. static struct AstNodeUserInfo *allocateAstNodeUserInfo() { struct AstNodeUserInfo *NodeInfo; NodeInfo = (struct AstNodeUserInfo *) malloc(sizeof(struct AstNodeUserInfo)); NodeInfo->IsOutermostParallel = 0; return NodeInfo; } // Free the AstNodeInfo structure. static void freeAstNodeUserInfo(void *Ptr) { struct AstNodeUserInfo *Info; Info = (struct AstNodeUserInfo *) Ptr; free(Info); } // Check if the current scheduling dimension is parallel. // // We check for parallelism by verifying that the loop does not carry any // dependences. // // Parallelism test: if the distance is zero in all outer dimensions, then it // has to be zero in the current dimension as well. // // Implementation: first, translate dependences into time space, then force // outer dimensions to be equal. If the distance is zero in the current // dimension, then the loop is parallel. The distance is zero in the current // dimension if it is a subset of a map with equal values for the current // dimension. static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build, Dependences *D) { isl_union_map *Schedule, *Deps; isl_map *ScheduleDeps, *Test; isl_space *ScheduleSpace; unsigned Dimension, IsParallel; Schedule = isl_ast_build_get_schedule(Build); ScheduleSpace = isl_ast_build_get_schedule_space(Build); Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1; Deps = D->getDependences(Dependences::TYPE_ALL); Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule)); Deps = isl_union_map_apply_domain(Deps, Schedule); if (isl_union_map_is_empty(Deps)) { isl_union_map_free(Deps); isl_space_free(ScheduleSpace); return 1; } ScheduleDeps = isl_map_from_union_map(Deps); for (unsigned i = 0; i < Dimension; i++) ScheduleDeps = isl_map_equate(ScheduleDeps, isl_dim_out, i, isl_dim_in, i); Test = isl_map_universe(isl_map_get_space(ScheduleDeps)); Test = isl_map_equate(Test, isl_dim_out, Dimension, isl_dim_in, Dimension); IsParallel = isl_map_is_subset(ScheduleDeps, Test); isl_space_free(ScheduleSpace); isl_map_free(Test); isl_map_free(ScheduleDeps); return IsParallel; } // Mark a for node openmp parallel, if it is the outermost parallel for node. static void markOpenmpParallel(__isl_keep isl_ast_build *Build, struct AstBuildUserInfo *BuildInfo, struct AstNodeUserInfo *NodeInfo) { if (BuildInfo->InParallelFor) return; if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) { BuildInfo->InParallelFor = 1; NodeInfo->IsOutermostParallel = 1; } } // This method is executed before the construction of a for node. It creates // an isl_id that is used to annotate the subsequently generated ast for nodes. // // In this function we also run the following analyses: // // - Detection of openmp parallel loops // static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build, void *User) { isl_id *Id; struct AstBuildUserInfo *BuildInfo; struct AstNodeUserInfo *NodeInfo; BuildInfo = (struct AstBuildUserInfo *) User; NodeInfo = allocateAstNodeUserInfo(); Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo); Id = isl_id_set_free_user(Id, freeAstNodeUserInfo); markOpenmpParallel(Build, BuildInfo, NodeInfo); return Id; } // This method is executed after the construction of a for node. // // It performs the following actions: // // - Reset the 'InParallelFor' flag, as soon as we leave a for node, // that is marked as openmp parallel. // static __isl_give isl_ast_node * astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build, void *User) { isl_id *Id; struct AstBuildUserInfo *BuildInfo; struct AstNodeUserInfo *Info; Id = isl_ast_node_get_annotation(Node); if (!Id) return Node; Info = (struct AstNodeUserInfo *) isl_id_get_user(Id); if (Info && Info->IsOutermostParallel) { BuildInfo = (struct AstBuildUserInfo *) User; BuildInfo->InParallelFor = 0; } isl_id_free(Id); return Node; } static __isl_give isl_ast_node * AtEachDomain(__isl_keep isl_ast_node *Node, __isl_keep isl_ast_build *Context, void *User) { isl_map *Map; Loading @@ -90,10 +270,11 @@ static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node, return isl_ast_node_set_annotation(Node, Annotation); } IslAst::IslAst(Scop *Scop) : S(Scop) { IslAst::IslAst(Scop *Scop, Dependences &D) : S(Scop) { isl_ctx *Ctx = S->getIslCtx(); isl_options_set_ast_build_atomic_upper_bound(Ctx, true); isl_ast_build *Context; struct AstBuildUserInfo BuildInfo; if (UseContext) Context = isl_ast_build_from_context(S->getContext()); Loading @@ -112,6 +293,16 @@ IslAst::IslAst(Scop *Scop) : S(Scop) { isl_union_map_dump(Schedule); ); if (DetectParallel) { BuildInfo.Deps = &D; BuildInfo.InParallelFor = 0; Context = isl_ast_build_set_before_each_for(Context, &astBuildBeforeFor, &BuildInfo); Context = isl_ast_build_set_after_each_for(Context, &astBuildAfterFor, &BuildInfo); } Root = isl_ast_build_ast_from_schedule(Context, Schedule); isl_ast_build_free(Context); Loading Loading @@ -141,7 +332,11 @@ IslAst::~IslAst() { /// Print a C like representation of the program. void IslAst::pprint(llvm::raw_ostream &OS) { isl_ast_node *Root; isl_ast_print_options *Options = isl_ast_print_options_alloc(S->getIslCtx()); isl_ast_print_options *Options; Options = isl_ast_print_options_alloc(S->getIslCtx()); Options = isl_ast_print_options_set_print_for(Options, &printFor, NULL); isl_printer *P = isl_printer_to_str(S->getIslCtx()); P = isl_printer_set_output_format(P, ISL_FORMAT_C); Root = getAst(); Loading Loading @@ -174,7 +369,9 @@ bool IslAstInfo::runOnScop(Scop &Scop) { S = &Scop; Ast = new IslAst(&Scop); Dependences &D = getAnalysis<Dependences>(); Ast = new IslAst(&Scop, D); return false; } Loading @@ -195,12 +392,14 @@ void IslAstInfo::getAnalysisUsage(AnalysisUsage &AU) const { // Get the Common analysis usage of ScopPasses. ScopPass::getAnalysisUsage(AU); AU.addRequired<ScopInfo>(); AU.addRequired<Dependences>(); } char IslAstInfo::ID = 0; INITIALIZE_PASS_BEGIN(IslAstInfo, "polly-ast", "Generate an AST of the SCoP (isl)", false, false) INITIALIZE_PASS_DEPENDENCY(ScopInfo) INITIALIZE_PASS_DEPENDENCY(Dependences) INITIALIZE_PASS_END(IslAstInfo, "polly-ast", "Generate an AST from the SCoP (isl)", false, false) Loading polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll 0 → 100644 +46 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < 1024; i++) ; for (j = 0; j < 1024; j++) ; A[i][j] = 1; @A = common global [1024 x [1024 x i32]] zeroinitializer define void @bar() { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, 1024 br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, 1024 br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; CHECK: #pragma omp parallel for ; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3); polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll 0 → 100644 +55 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < n; i++) ; for (j = 0; j < n; j++) ; A[i][j] = 1; @A = common global [1024 x [1024 x i32]] zeroinitializer define void @bar(i64 %n) { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, %n br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, %n br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; At the first look both loops seem parallel, however due to the delinearization ; we get the following dependences: ; [n] -> { loop_body[i0, i1] -> loop_body[1024 + i0, -1 + i1]: ; 0 <= i0 < n - 1024 and 1 <= i1 < n} ; They cause the outer loop to be non-parallel. We can only prove their ; absence, if we know that n < 1024. This information is currently not available ; to polly. However, we should be able to obtain it due to the out of bounds ; memory accesses, that would happen if n >= 1024. ; ; CHECK: for (int c1 = 0; c1 < n; c1 += 1) ; CHECK: #pragma omp parallel for ; CHECK: for (int c3 = 0; c3 < n; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3); polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll 0 → 100644 +46 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < n; i++) ; for (j = 0; j < n; j++) ; A[j] = 1; @A = common global [1024 x i32] zeroinitializer define void @bar(i64 %n) { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, %n br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, %n br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %j store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; CHECK: for (int c1 = 0; c1 < n; c1 += 1) ; CHECK: #pragma omp parallel for ; CHECK: for (int c3 = 0; c3 < n; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3); polly/test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll 0 → 100644 +46 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < n; i++) ; for (j = 0; j < n; j++) ; A[i] = 1; @A = common global [1024 x i32] zeroinitializer define void @bar(i64 %n) { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, %n br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, %n br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %i store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; CHECK: #pragma omp parallel for ; CHECK: for (int c1 = 0; c1 < n; c1 += 1) ; CHECK: for (int c3 = 0; c3 < n; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3); Loading
polly/lib/CodeGen/IslAst.cpp +205 −6 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "polly/CodeGen/IslAst.h" #include "polly/LinkAllPasses.h" #include "polly/Dependences.h" #include "polly/ScopInfo.h" #define DEBUG_TYPE "polly-ast" Loading @@ -43,10 +44,14 @@ static cl::opt<bool> UseContext("polly-ast-use-context", cl::desc("Use context"), cl::Hidden, cl::init(false), cl::ZeroOrMore); static cl::opt<bool> DetectParallel("polly-ast-detect-parallel", cl::desc("Detect parallelism"), cl::Hidden, cl::init(false), cl::ZeroOrMore); namespace polly { class IslAst { public: IslAst(Scop *Scop); IslAst(Scop *Scop, Dependences &D); ~IslAst(); Loading @@ -72,7 +77,182 @@ static void IslAstUserFree(void *User) free(UserStruct); } static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node, // Information about an ast node. struct AstNodeUserInfo { // The node is the outermost parallel loop. int IsOutermostParallel; }; // Temporary information used when building the ast. struct AstBuildUserInfo { // The dependence information. Dependences *Deps; // We are inside a parallel for node. int InParallelFor; }; // Print a loop annotated with OpenMP pragmas. static __isl_give isl_printer * printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer, __isl_take isl_ast_print_options *PrintOptions, AstNodeUserInfo *Info) { if (Info && Info->IsOutermostParallel) { Printer = isl_printer_start_line(Printer); if (Info->IsOutermostParallel) Printer = isl_printer_print_str(Printer, "#pragma omp parallel for"); Printer = isl_printer_end_line(Printer); } return isl_ast_node_for_print(Node, Printer, PrintOptions); } // Print an isl_ast_for. static __isl_give isl_printer * printFor(__isl_take isl_printer *Printer, __isl_take isl_ast_print_options *PrintOptions, __isl_keep isl_ast_node *Node, void *User) { isl_id *Id = isl_ast_node_get_annotation(Node); if (!Id) return isl_ast_node_for_print(Node, Printer, PrintOptions); struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id); Printer = printParallelFor(Node, Printer, PrintOptions, Info); isl_id_free(Id); return Printer; } // Allocate an AstNodeInfo structure and initialize it with default values. static struct AstNodeUserInfo *allocateAstNodeUserInfo() { struct AstNodeUserInfo *NodeInfo; NodeInfo = (struct AstNodeUserInfo *) malloc(sizeof(struct AstNodeUserInfo)); NodeInfo->IsOutermostParallel = 0; return NodeInfo; } // Free the AstNodeInfo structure. static void freeAstNodeUserInfo(void *Ptr) { struct AstNodeUserInfo *Info; Info = (struct AstNodeUserInfo *) Ptr; free(Info); } // Check if the current scheduling dimension is parallel. // // We check for parallelism by verifying that the loop does not carry any // dependences. // // Parallelism test: if the distance is zero in all outer dimensions, then it // has to be zero in the current dimension as well. // // Implementation: first, translate dependences into time space, then force // outer dimensions to be equal. If the distance is zero in the current // dimension, then the loop is parallel. The distance is zero in the current // dimension if it is a subset of a map with equal values for the current // dimension. static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build, Dependences *D) { isl_union_map *Schedule, *Deps; isl_map *ScheduleDeps, *Test; isl_space *ScheduleSpace; unsigned Dimension, IsParallel; Schedule = isl_ast_build_get_schedule(Build); ScheduleSpace = isl_ast_build_get_schedule_space(Build); Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1; Deps = D->getDependences(Dependences::TYPE_ALL); Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule)); Deps = isl_union_map_apply_domain(Deps, Schedule); if (isl_union_map_is_empty(Deps)) { isl_union_map_free(Deps); isl_space_free(ScheduleSpace); return 1; } ScheduleDeps = isl_map_from_union_map(Deps); for (unsigned i = 0; i < Dimension; i++) ScheduleDeps = isl_map_equate(ScheduleDeps, isl_dim_out, i, isl_dim_in, i); Test = isl_map_universe(isl_map_get_space(ScheduleDeps)); Test = isl_map_equate(Test, isl_dim_out, Dimension, isl_dim_in, Dimension); IsParallel = isl_map_is_subset(ScheduleDeps, Test); isl_space_free(ScheduleSpace); isl_map_free(Test); isl_map_free(ScheduleDeps); return IsParallel; } // Mark a for node openmp parallel, if it is the outermost parallel for node. static void markOpenmpParallel(__isl_keep isl_ast_build *Build, struct AstBuildUserInfo *BuildInfo, struct AstNodeUserInfo *NodeInfo) { if (BuildInfo->InParallelFor) return; if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) { BuildInfo->InParallelFor = 1; NodeInfo->IsOutermostParallel = 1; } } // This method is executed before the construction of a for node. It creates // an isl_id that is used to annotate the subsequently generated ast for nodes. // // In this function we also run the following analyses: // // - Detection of openmp parallel loops // static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build, void *User) { isl_id *Id; struct AstBuildUserInfo *BuildInfo; struct AstNodeUserInfo *NodeInfo; BuildInfo = (struct AstBuildUserInfo *) User; NodeInfo = allocateAstNodeUserInfo(); Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo); Id = isl_id_set_free_user(Id, freeAstNodeUserInfo); markOpenmpParallel(Build, BuildInfo, NodeInfo); return Id; } // This method is executed after the construction of a for node. // // It performs the following actions: // // - Reset the 'InParallelFor' flag, as soon as we leave a for node, // that is marked as openmp parallel. // static __isl_give isl_ast_node * astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build, void *User) { isl_id *Id; struct AstBuildUserInfo *BuildInfo; struct AstNodeUserInfo *Info; Id = isl_ast_node_get_annotation(Node); if (!Id) return Node; Info = (struct AstNodeUserInfo *) isl_id_get_user(Id); if (Info && Info->IsOutermostParallel) { BuildInfo = (struct AstBuildUserInfo *) User; BuildInfo->InParallelFor = 0; } isl_id_free(Id); return Node; } static __isl_give isl_ast_node * AtEachDomain(__isl_keep isl_ast_node *Node, __isl_keep isl_ast_build *Context, void *User) { isl_map *Map; Loading @@ -90,10 +270,11 @@ static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node, return isl_ast_node_set_annotation(Node, Annotation); } IslAst::IslAst(Scop *Scop) : S(Scop) { IslAst::IslAst(Scop *Scop, Dependences &D) : S(Scop) { isl_ctx *Ctx = S->getIslCtx(); isl_options_set_ast_build_atomic_upper_bound(Ctx, true); isl_ast_build *Context; struct AstBuildUserInfo BuildInfo; if (UseContext) Context = isl_ast_build_from_context(S->getContext()); Loading @@ -112,6 +293,16 @@ IslAst::IslAst(Scop *Scop) : S(Scop) { isl_union_map_dump(Schedule); ); if (DetectParallel) { BuildInfo.Deps = &D; BuildInfo.InParallelFor = 0; Context = isl_ast_build_set_before_each_for(Context, &astBuildBeforeFor, &BuildInfo); Context = isl_ast_build_set_after_each_for(Context, &astBuildAfterFor, &BuildInfo); } Root = isl_ast_build_ast_from_schedule(Context, Schedule); isl_ast_build_free(Context); Loading Loading @@ -141,7 +332,11 @@ IslAst::~IslAst() { /// Print a C like representation of the program. void IslAst::pprint(llvm::raw_ostream &OS) { isl_ast_node *Root; isl_ast_print_options *Options = isl_ast_print_options_alloc(S->getIslCtx()); isl_ast_print_options *Options; Options = isl_ast_print_options_alloc(S->getIslCtx()); Options = isl_ast_print_options_set_print_for(Options, &printFor, NULL); isl_printer *P = isl_printer_to_str(S->getIslCtx()); P = isl_printer_set_output_format(P, ISL_FORMAT_C); Root = getAst(); Loading Loading @@ -174,7 +369,9 @@ bool IslAstInfo::runOnScop(Scop &Scop) { S = &Scop; Ast = new IslAst(&Scop); Dependences &D = getAnalysis<Dependences>(); Ast = new IslAst(&Scop, D); return false; } Loading @@ -195,12 +392,14 @@ void IslAstInfo::getAnalysisUsage(AnalysisUsage &AU) const { // Get the Common analysis usage of ScopPasses. ScopPass::getAnalysisUsage(AU); AU.addRequired<ScopInfo>(); AU.addRequired<Dependences>(); } char IslAstInfo::ID = 0; INITIALIZE_PASS_BEGIN(IslAstInfo, "polly-ast", "Generate an AST of the SCoP (isl)", false, false) INITIALIZE_PASS_DEPENDENCY(ScopInfo) INITIALIZE_PASS_DEPENDENCY(Dependences) INITIALIZE_PASS_END(IslAstInfo, "polly-ast", "Generate an AST from the SCoP (isl)", false, false) Loading
polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll 0 → 100644 +46 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < 1024; i++) ; for (j = 0; j < 1024; j++) ; A[i][j] = 1; @A = common global [1024 x [1024 x i32]] zeroinitializer define void @bar() { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, 1024 br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, 1024 br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; CHECK: #pragma omp parallel for ; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3);
polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll 0 → 100644 +55 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < n; i++) ; for (j = 0; j < n; j++) ; A[i][j] = 1; @A = common global [1024 x [1024 x i32]] zeroinitializer define void @bar(i64 %n) { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, %n br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, %n br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; At the first look both loops seem parallel, however due to the delinearization ; we get the following dependences: ; [n] -> { loop_body[i0, i1] -> loop_body[1024 + i0, -1 + i1]: ; 0 <= i0 < n - 1024 and 1 <= i1 < n} ; They cause the outer loop to be non-parallel. We can only prove their ; absence, if we know that n < 1024. This information is currently not available ; to polly. However, we should be able to obtain it due to the out of bounds ; memory accesses, that would happen if n >= 1024. ; ; CHECK: for (int c1 = 0; c1 < n; c1 += 1) ; CHECK: #pragma omp parallel for ; CHECK: for (int c3 = 0; c3 < n; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3);
polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll 0 → 100644 +46 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < n; i++) ; for (j = 0; j < n; j++) ; A[j] = 1; @A = common global [1024 x i32] zeroinitializer define void @bar(i64 %n) { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, %n br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, %n br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %j store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; CHECK: for (int c1 = 0; c1 < n; c1 += 1) ; CHECK: #pragma omp parallel for ; CHECK: for (int c3 = 0; c3 < n; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3);
polly/test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll 0 → 100644 +46 −0 Original line number Diff line number Diff line ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; for (i = 0; i < n; i++) ; for (j = 0; j < n; j++) ; A[i] = 1; @A = common global [1024 x i32] zeroinitializer define void @bar(i64 %n) { start: fence seq_cst br label %loop.i loop.i: %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ] %exitcond.i = icmp ne i64 %i, %n br i1 %exitcond.i, label %loop.j, label %ret loop.j: %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ] %exitcond.j = icmp ne i64 %j, %n br i1 %exitcond.j, label %loop.body, label %loop.i.backedge loop.body: %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %i store i32 1, i32* %scevgep br label %loop.j.backedge loop.j.backedge: %j.next = add nsw i64 %j, 1 br label %loop.j loop.i.backedge: %i.next = add nsw i64 %i, 1 br label %loop.i ret: fence seq_cst ret void } ; CHECK: #pragma omp parallel for ; CHECK: for (int c1 = 0; c1 < n; c1 += 1) ; CHECK: for (int c3 = 0; c3 < n; c3 += 1) ; CHECK: Stmt_loop_body(c1, c3);