isl: Detect openmp parallelism (5e407950) · Commits · llvm-doe / llvm-project

polly/lib/CodeGen/IslAst.cpp

+205 −6

Original line number	Diff line number	Diff line
		@@ -22,6 +22,7 @@
		#include "polly/CodeGen/IslAst.h"

		#include "polly/LinkAllPasses.h"
		#include "polly/Dependences.h"
		#include "polly/ScopInfo.h"

		#define DEBUG_TYPE "polly-ast"
		@@ -43,10 +44,14 @@ static cl::opt<bool>
		UseContext("polly-ast-use-context", cl::desc("Use context"), cl::Hidden,
		cl::init(false), cl::ZeroOrMore);

		static cl::opt<bool>
		DetectParallel("polly-ast-detect-parallel", cl::desc("Detect parallelism"),
		cl::Hidden, cl::init(false), cl::ZeroOrMore);

		namespace polly {
		class IslAst {
		public:
		IslAst(Scop *Scop);
		IslAst(Scop *Scop, Dependences &D);

		~IslAst();

		@@ -72,7 +77,182 @@ static void IslAstUserFree(void *User)
		free(UserStruct);
		}

		static __isl_give isl_ast_node AtEachDomain(__isl_keep isl_ast_node Node,
		// Information about an ast node.
		struct AstNodeUserInfo {
		// The node is the outermost parallel loop.
		int IsOutermostParallel;
		};

		// Temporary information used when building the ast.
		struct AstBuildUserInfo {
		// The dependence information.
		Dependences *Deps;

		// We are inside a parallel for node.
		int InParallelFor;
		};

		// Print a loop annotated with OpenMP pragmas.
		static __isl_give isl_printer *
		printParallelFor(__isl_keep isl_ast_node Node, __isl_take isl_printer Printer,
		__isl_take isl_ast_print_options *PrintOptions,
		AstNodeUserInfo *Info) {
		if (Info && Info->IsOutermostParallel) {
		Printer = isl_printer_start_line(Printer);
		if (Info->IsOutermostParallel)
		Printer = isl_printer_print_str(Printer, "#pragma omp parallel for");
		Printer = isl_printer_end_line(Printer);
		}
		return isl_ast_node_for_print(Node, Printer, PrintOptions);
		}

		// Print an isl_ast_for.
		static __isl_give isl_printer *
		printFor(__isl_take isl_printer *Printer,
		__isl_take isl_ast_print_options *PrintOptions,
		__isl_keep isl_ast_node Node, void User) {
		isl_id *Id = isl_ast_node_get_annotation(Node);
		if (!Id)
		return isl_ast_node_for_print(Node, Printer, PrintOptions);

		struct AstNodeUserInfo Info = (struct AstNodeUserInfo ) isl_id_get_user(Id);
		Printer = printParallelFor(Node, Printer, PrintOptions, Info);
		isl_id_free(Id);
		return Printer;
		}

		// Allocate an AstNodeInfo structure and initialize it with default values.
		static struct AstNodeUserInfo *allocateAstNodeUserInfo() {
		struct AstNodeUserInfo *NodeInfo;
		NodeInfo = (struct AstNodeUserInfo *) malloc(sizeof(struct AstNodeUserInfo));
		NodeInfo->IsOutermostParallel = 0;
		return NodeInfo;
		}

		// Free the AstNodeInfo structure.
		static void freeAstNodeUserInfo(void *Ptr) {
		struct AstNodeUserInfo *Info;
		Info = (struct AstNodeUserInfo *) Ptr;
		free(Info);
		}

		// Check if the current scheduling dimension is parallel.
		//
		// We check for parallelism by verifying that the loop does not carry any
		// dependences.
		//
		// Parallelism test: if the distance is zero in all outer dimensions, then it
		// has to be zero in the current dimension as well.
		//
		// Implementation: first, translate dependences into time space, then force
		// outer dimensions to be equal. If the distance is zero in the current
		// dimension, then the loop is parallel. The distance is zero in the current
		// dimension if it is a subset of a map with equal values for the current
		// dimension.
		static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
		Dependences *D) {
		isl_union_map Schedule, Deps;
		isl_map ScheduleDeps, Test;
		isl_space *ScheduleSpace;
		unsigned Dimension, IsParallel;

		Schedule = isl_ast_build_get_schedule(Build);
		ScheduleSpace = isl_ast_build_get_schedule_space(Build);

		Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1;

		Deps = D->getDependences(Dependences::TYPE_ALL);
		Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule));
		Deps = isl_union_map_apply_domain(Deps, Schedule);

		if (isl_union_map_is_empty(Deps)) {
		isl_union_map_free(Deps);
		isl_space_free(ScheduleSpace);
		return 1;
		}

		ScheduleDeps = isl_map_from_union_map(Deps);

		for (unsigned i = 0; i < Dimension; i++)
		ScheduleDeps = isl_map_equate(ScheduleDeps, isl_dim_out, i, isl_dim_in, i);

		Test = isl_map_universe(isl_map_get_space(ScheduleDeps));
		Test = isl_map_equate(Test, isl_dim_out, Dimension, isl_dim_in, Dimension);
		IsParallel = isl_map_is_subset(ScheduleDeps, Test);

		isl_space_free(ScheduleSpace);
		isl_map_free(Test);
		isl_map_free(ScheduleDeps);

		return IsParallel;
		}

		// Mark a for node openmp parallel, if it is the outermost parallel for node.
		static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
		struct AstBuildUserInfo *BuildInfo,
		struct AstNodeUserInfo *NodeInfo) {
		if (BuildInfo->InParallelFor)
		return;

		if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) {
		BuildInfo->InParallelFor = 1;
		NodeInfo->IsOutermostParallel = 1;
		}
		}

		// This method is executed before the construction of a for node. It creates
		// an isl_id that is used to annotate the subsequently generated ast for nodes.
		//
		// In this function we also run the following analyses:
		//
		// - Detection of openmp parallel loops
		//
		static __isl_give isl_id astBuildBeforeFor(__isl_keep isl_ast_build Build,
		void *User) {
		isl_id *Id;
		struct AstBuildUserInfo *BuildInfo;
		struct AstNodeUserInfo *NodeInfo;

		BuildInfo = (struct AstBuildUserInfo *) User;
		NodeInfo = allocateAstNodeUserInfo();
		Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
		Id = isl_id_set_free_user(Id, freeAstNodeUserInfo);

		markOpenmpParallel(Build, BuildInfo, NodeInfo);

		return Id;
		}

		// This method is executed after the construction of a for node.
		//
		// It performs the following actions:
		//
		// - Reset the 'InParallelFor' flag, as soon as we leave a for node,
		// that is marked as openmp parallel.
		//
		static __isl_give isl_ast_node *
		astBuildAfterFor(__isl_take isl_ast_node *Node,
		__isl_keep isl_ast_build Build, void User) {
		isl_id *Id;
		struct AstBuildUserInfo *BuildInfo;
		struct AstNodeUserInfo *Info;

		Id = isl_ast_node_get_annotation(Node);
		if (!Id)
		return Node;
		Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
		if (Info && Info->IsOutermostParallel) {
		BuildInfo = (struct AstBuildUserInfo *) User;
		BuildInfo->InParallelFor = 0;
		}

		isl_id_free(Id);

		return Node;
		}

		static __isl_give isl_ast_node *
		AtEachDomain(__isl_keep isl_ast_node *Node,
		__isl_keep isl_ast_build Context, void User)
		{
		isl_map *Map;
		@@ -90,10 +270,11 @@ static __isl_give isl_ast_node AtEachDomain(__isl_keep isl_ast_node Node,
		return isl_ast_node_set_annotation(Node, Annotation);
		}

		IslAst::IslAst(Scop *Scop) : S(Scop) {
		IslAst::IslAst(Scop *Scop, Dependences &D) : S(Scop) {
		isl_ctx *Ctx = S->getIslCtx();
		isl_options_set_ast_build_atomic_upper_bound(Ctx, true);
		isl_ast_build *Context;
		struct AstBuildUserInfo BuildInfo;

		if (UseContext)
		Context = isl_ast_build_from_context(S->getContext());
		@@ -112,6 +293,16 @@ IslAst::IslAst(Scop *Scop) : S(Scop) {
		isl_union_map_dump(Schedule);
		);

		if (DetectParallel) {
		BuildInfo.Deps = &D;
		BuildInfo.InParallelFor = 0;

		Context = isl_ast_build_set_before_each_for(Context, &astBuildBeforeFor,
		&BuildInfo);
		Context = isl_ast_build_set_after_each_for(Context, &astBuildAfterFor,
		&BuildInfo);
		}

		Root = isl_ast_build_ast_from_schedule(Context, Schedule);

		isl_ast_build_free(Context);
		@@ -141,7 +332,11 @@ IslAst::~IslAst() {
		/// Print a C like representation of the program.
		void IslAst::pprint(llvm::raw_ostream &OS) {
		isl_ast_node *Root;
		isl_ast_print_options *Options = isl_ast_print_options_alloc(S->getIslCtx());
		isl_ast_print_options *Options;

		Options = isl_ast_print_options_alloc(S->getIslCtx());
		Options = isl_ast_print_options_set_print_for(Options, &printFor, NULL);

		isl_printer *P = isl_printer_to_str(S->getIslCtx());
		P = isl_printer_set_output_format(P, ISL_FORMAT_C);
		Root = getAst();
		@@ -174,7 +369,9 @@ bool IslAstInfo::runOnScop(Scop &Scop) {

		S = &Scop;

		Ast = new IslAst(&Scop);
		Dependences &D = getAnalysis<Dependences>();

		Ast = new IslAst(&Scop, D);

		return false;
		}
		@@ -195,12 +392,14 @@ void IslAstInfo::getAnalysisUsage(AnalysisUsage &AU) const {
		// Get the Common analysis usage of ScopPasses.
		ScopPass::getAnalysisUsage(AU);
		AU.addRequired<ScopInfo>();
		AU.addRequired<Dependences>();
		}
		char IslAstInfo::ID = 0;

		INITIALIZE_PASS_BEGIN(IslAstInfo, "polly-ast",
		"Generate an AST of the SCoP (isl)", false, false)
		INITIALIZE_PASS_DEPENDENCY(ScopInfo)
		INITIALIZE_PASS_DEPENDENCY(Dependences)
		INITIALIZE_PASS_END(IslAstInfo, "polly-ast",
		"Generate an AST from the SCoP (isl)", false, false)

polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll

0 → 100644

+46 −0

Original line number	Diff line number	Diff line
		; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s \| FileCheck %s
		target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
		target triple = "x86_64-pc-linux-gnu"

		; for (i = 0; i < 1024; i++)
		; for (j = 0; j < 1024; j++)
		; A[i][j] = 1;

		@A = common global [1024 x [1024 x i32]] zeroinitializer
		define void @bar() {
		start:
		fence seq_cst
		br label %loop.i

		loop.i:
		%i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
		%exitcond.i = icmp ne i64 %i, 1024
		br i1 %exitcond.i, label %loop.j, label %ret

		loop.j:
		%j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
		%exitcond.j = icmp ne i64 %j, 1024
		br i1 %exitcond.j, label %loop.body, label %loop.i.backedge

		loop.body:
		%scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i
		store i32 1, i32* %scevgep
		br label %loop.j.backedge

		loop.j.backedge:
		%j.next = add nsw i64 %j, 1
		br label %loop.j

		loop.i.backedge:
		%i.next = add nsw i64 %i, 1
		br label %loop.i

		ret:
		fence seq_cst
		ret void
		}

		; CHECK: #pragma omp parallel for
		; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1)
		; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
		; CHECK: Stmt_loop_body(c1, c3);

polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll

0 → 100644

+55 −0

Original line number	Diff line number	Diff line
		; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s \| FileCheck %s
		target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
		target triple = "x86_64-pc-linux-gnu"

		; for (i = 0; i < n; i++)
		; for (j = 0; j < n; j++)
		; A[i][j] = 1;

		@A = common global [1024 x [1024 x i32]] zeroinitializer
		define void @bar(i64 %n) {
		start:
		fence seq_cst
		br label %loop.i

		loop.i:
		%i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
		%exitcond.i = icmp ne i64 %i, %n
		br i1 %exitcond.i, label %loop.j, label %ret

		loop.j:
		%j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
		%exitcond.j = icmp ne i64 %j, %n
		br i1 %exitcond.j, label %loop.body, label %loop.i.backedge

		loop.body:
		%scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i
		store i32 1, i32* %scevgep
		br label %loop.j.backedge

		loop.j.backedge:
		%j.next = add nsw i64 %j, 1
		br label %loop.j

		loop.i.backedge:
		%i.next = add nsw i64 %i, 1
		br label %loop.i

		ret:
		fence seq_cst
		ret void
		}

		; At the first look both loops seem parallel, however due to the delinearization
		; we get the following dependences:
		; [n] -> { loop_body[i0, i1] -> loop_body[1024 + i0, -1 + i1]:
		; 0 <= i0 < n - 1024 and 1 <= i1 < n}
		; They cause the outer loop to be non-parallel. We can only prove their
		; absence, if we know that n < 1024. This information is currently not available
		; to polly. However, we should be able to obtain it due to the out of bounds
		; memory accesses, that would happen if n >= 1024.
		;
		; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
		; CHECK: #pragma omp parallel for
		; CHECK: for (int c3 = 0; c3 < n; c3 += 1)
		; CHECK: Stmt_loop_body(c1, c3);

polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll

0 → 100644

+46 −0

Original line number	Diff line number	Diff line
		; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s \| FileCheck %s
		target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
		target triple = "x86_64-pc-linux-gnu"

		; for (i = 0; i < n; i++)
		; for (j = 0; j < n; j++)
		; A[j] = 1;

		@A = common global [1024 x i32] zeroinitializer
		define void @bar(i64 %n) {
		start:
		fence seq_cst
		br label %loop.i

		loop.i:
		%i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
		%exitcond.i = icmp ne i64 %i, %n
		br i1 %exitcond.i, label %loop.j, label %ret

		loop.j:
		%j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
		%exitcond.j = icmp ne i64 %j, %n
		br i1 %exitcond.j, label %loop.body, label %loop.i.backedge

		loop.body:
		%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %j
		store i32 1, i32* %scevgep
		br label %loop.j.backedge

		loop.j.backedge:
		%j.next = add nsw i64 %j, 1
		br label %loop.j

		loop.i.backedge:
		%i.next = add nsw i64 %i, 1
		br label %loop.i

		ret:
		fence seq_cst
		ret void
		}

		; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
		; CHECK: #pragma omp parallel for
		; CHECK: for (int c3 = 0; c3 < n; c3 += 1)
		; CHECK: Stmt_loop_body(c1, c3);

polly/test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll

0 → 100644

+46 −0

Original line number	Diff line number	Diff line
		; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s \| FileCheck %s
		target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
		target triple = "x86_64-pc-linux-gnu"

		; for (i = 0; i < n; i++)
		; for (j = 0; j < n; j++)
		; A[i] = 1;

		@A = common global [1024 x i32] zeroinitializer
		define void @bar(i64 %n) {
		start:
		fence seq_cst
		br label %loop.i

		loop.i:
		%i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
		%exitcond.i = icmp ne i64 %i, %n
		br i1 %exitcond.i, label %loop.j, label %ret

		loop.j:
		%j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
		%exitcond.j = icmp ne i64 %j, %n
		br i1 %exitcond.j, label %loop.body, label %loop.i.backedge

		loop.body:
		%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %i
		store i32 1, i32* %scevgep
		br label %loop.j.backedge

		loop.j.backedge:
		%j.next = add nsw i64 %j, 1
		br label %loop.j

		loop.i.backedge:
		%i.next = add nsw i64 %i, 1
		br label %loop.i

		ret:
		fence seq_cst
		ret void
		}

		; CHECK: #pragma omp parallel for
		; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
		; CHECK: for (int c3 = 0; c3 < n; c3 += 1)
		; CHECK: Stmt_loop_body(c1, c3);