Commit fec2927e authored by Huber, Joseph's avatar Huber, Joseph
Browse files

[OpenMP] Add NoSync attributes to alloc / free shared RTL calls

This patch adds the `nosync` attribute to the `__kmpc_alloc_shared` and
`__kmpc_free_shared` runtime library calls. This allows code analysis to
know that these functins dont contain any barriers. This will help
optimizations reason about the CFG of blocks containing these calls.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D109995
parent 91ace9f0
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -539,6 +539,11 @@ __OMP_ATTRS_SET(ReadOnlyPtrAttrs,
                                   EnumAttr(NoCapture))
                    : AttributeSet())

__OMP_ATTRS_SET(DeviceAllocAttrs,
                OptimisticAttributes
                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync))
                    : AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync)))

#if 0
__OMP_ATTRS_SET(WriteOnlyPtrAttrs,
                OptimisticAttributes
@@ -858,9 +863,9 @@ __OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
                ParamAttrs(ReadOnlyPtrAttrs))

__OMP_RTL_ATTRS(__kmpc_alloc_shared, DefaultAttrs, ReturnPtrAttrs,
__OMP_RTL_ATTRS(__kmpc_alloc_shared, DeviceAllocAttrs, ReturnPtrAttrs,
                ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_free_shared, AllocAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(),
                ParamAttrs(NoCaptureAttrs))

__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs())
+58 −13
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED
@@ -17,12 +17,17 @@ target triple = "nvptx64"
declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)

;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
;.
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
;.
define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true)
; CHECK-NEXT:    call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-NEXT:    call void @bar() #[[ATTR0]]
; CHECK-NEXT:    call void @foo() #[[ATTR4:[0-9]+]]
; CHECK-NEXT:    call void @bar() #[[ATTR4]]
; CHECK-NEXT:    call void @unknown_no_openmp()
; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true)
; CHECK-NEXT:    ret void
@@ -30,8 +35,8 @@ define void @kernel() {
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT:  entry:
; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true)
; CHECK-DISABLED-NEXT:    call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-DISABLED-NEXT:    call void @bar() #[[ATTR0]]
; CHECK-DISABLED-NEXT:    call void @foo() #[[ATTR4:[0-9]+]]
; CHECK-DISABLED-NEXT:    call void @bar() #[[ATTR4]]
; CHECK-DISABLED-NEXT:    call void @unknown_no_openmp()
; CHECK-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true)
; CHECK-DISABLED-NEXT:    ret void
@@ -47,13 +52,13 @@ entry:

define internal void @foo() {
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = alloca i8, i64 4, align 1
; CHECK-NEXT:    ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@foo
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-DISABLED-NEXT:  entry:
; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = alloca i8, i64 4, align 1
; CHECK-DISABLED-NEXT:    ret void
@@ -69,17 +74,17 @@ define internal void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
; CHECK-NEXT:    call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR0]]
; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR4]], !dbg [[DBG8:![0-9]+]]
; CHECK-NEXT:    call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR4]]
; CHECK-NEXT:    ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
; CHECK-DISABLED-NEXT:  entry:
; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
; CHECK-DISABLED-NEXT:    call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR4:[0-9]+]]
; CHECK-DISABLED-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR0]]
; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR4]], !dbg [[DBG8:![0-9]+]]
; CHECK-DISABLED-NEXT:    call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR5:[0-9]+]]
; CHECK-DISABLED-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR4]]
; CHECK-DISABLED-NEXT:    ret void
;
entry:
@@ -170,3 +175,43 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
!12 = !DILocation(line: 2, column: 2, scope: !8)
!13 = !DILocation(line: 4, column: 2, scope: !9)
!14 = !DILocation(line: 6, column: 2, scope: !9)
;.
; CHECK: attributes #[[ATTR0]] = { nosync nounwind }
; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
; CHECK: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
; CHECK: attributes #[[ATTR4]] = { nounwind }
; CHECK: attributes #[[ATTR5]] = { nosync nounwind writeonly }
;.
; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind }
; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly }
; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind }
; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind writeonly }
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c")
; CHECK: [[META2:![0-9]+]] = !{}
; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
; CHECK: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9)
; CHECK: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
; CHECK: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
; CHECK-DISABLED: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c")
; CHECK-DISABLED: [[META2:![0-9]+]] = !{}
; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK-DISABLED: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
; CHECK-DISABLED: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9)
; CHECK-DISABLED: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
; CHECK-DISABLED: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
; CHECK-DISABLED: [[DBG11]] = !DILocation(line: 6, column: 2, scope: !9)
;.
+17 −14
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
; RUN: opt -S -passes='openmp-opt' < %s | FileCheck %s
; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"

; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory
; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory
; CHECK-REMARKS-NOT: 6 bytes

%struct.ident_t = type { i32, i32, i32, i32, i8* }

@S = external local_unnamed_addr global i8*
@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8

; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory
; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory
; CHECK-REMARKS-NOT: 6 bytes

define dso_local void @foo() {
entry:
@@ -138,10 +139,10 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-LABEL: define {{[^@]+}}@foo() {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
; CHECK-NEXT:    [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR1:[0-9]+]]
; CHECK-NEXT:    [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]]
; CHECK-NEXT:    call void @unknown_no_openmp()
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR1]]
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR4]]
; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
; CHECK-NEXT:    ret void
;
@@ -163,7 +164,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT:    br i1 [[CMP]], label [[MASTER:%.*]], label [[EXIT:%.*]]
; CHECK:       master:
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR4]]
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR5]]
; CHECK-NEXT:    br label [[EXIT]]
; CHECK:       exit:
; CHECK-NEXT:    ret void
@@ -176,7 +177,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT:    br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]]
; CHECK:       master:
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR4]]
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR5]]
; CHECK-NEXT:    br label [[EXIT]]
; CHECK:       exit:
; CHECK-NEXT:    ret void
@@ -189,14 +190,15 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT:    br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]]
; CHECK:       master:
; CHECK-NEXT:    [[Y:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 24) #[[ATTR1]], !dbg [[DBG9:![0-9]+]]
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly [[Y]]) #[[ATTR4]]
; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[Y]], i64 noundef 24) #[[ATTR1]]
; CHECK-NEXT:    [[Y:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 24) #[[ATTR4]], !dbg [[DBG9:![0-9]+]]
; CHECK-NEXT:    call void @use.internalized(i8* nofree writeonly [[Y]]) #[[ATTR5]]
; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[Y]], i64 noundef 24) #[[ATTR4]]
; CHECK-NEXT:    br label [[EXIT]]
; CHECK:       exit:
; CHECK-NEXT:    ret void
;
;
; CHECK: Function Attrs: nofree nosync nounwind willreturn writeonly
; CHECK-LABEL: define {{[^@]+}}@use.internalized
; CHECK-SAME: (i8* nofree writeonly [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  entry:
@@ -212,10 +214,11 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
;
;.
; CHECK: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn writeonly }
; CHECK: attributes #[[ATTR1]] = { nounwind }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
; CHECK: attributes #[[ATTR4]] = { nounwind writeonly }
; CHECK: attributes #[[ATTR4]] = { nounwind }
; CHECK: attributes #[[ATTR5]] = { nounwind writeonly }
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")