Unverified Commit 6a62707c authored by agozillon's avatar agozillon Committed by GitHub
Browse files

[Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR lowering...

[Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR lowering utilising omp.bounds (#68689)

This patch seeks to add initial lowering of OpenMP array sections within
target region map clauses from MLIR to LLVM IR.

This patch seeks to support fixed sized contiguous (don't think OpenMP
supports anything other than contiguous sections from my reading but i
could be wrong) arrays initially, before looking toward assumed size and
shaped arrays. The patch also currently does not include stride, it's
left for future work.

Although, assumed size works in some fashion (dummy arguments) with some
minor alterations to the OMPEarlyOutliner, so it is possible changes
made in the IsolatedFromAbove series may allow this to work with no
further required patches.

It utilises the generated omp.bounds to calculate the size of the mapped
OpenMP array (both for sectioned and un-sectioned arrays) as well as the
offset to be passed to the kernel argument structure.

Alongside these changes some refactoring of how map data is handled is
attempted, using a new MapData structure to keep track of information
utilised in the lowering of mapped values.

The initial addition of a more complex createDeviceArgumentAccessor that
utilises capture kinds similarly to (and loosely based on) Clang to
generate different kernel argument accesses is also added.

A similar function for altering how the kernel argument is passed to the
kernel argument structure on the host is also utilised
(createAlteredByCaptureMap), which allows modification of the
pointer/basePointer based on their capture (and bounds information).
It's of note ByRef, is the default for explicit mappings and ByCopy will
be the default for implicit captures, so the former is currently tested
in this patch and the latter is not for the moment.
parent 4bbb2bc0
Loading
Loading
Loading
Loading
+389 −146

File changed.

Preview size limit exceeded, changes collapsed.

+56 −0
Original line number Diff line number Diff line
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

// This test checks the offload sizes provided to the OpenMP kernel argument
// structure are correct when lowering to LLVM-IR from MLIR with 3-D bounds 
// provided for a 3-D array. One with full default size, and the other with 
// a user specified OpenMP array sectioning. We expect the default sized 
// array bounds to lower to the full size of the array and the sectioned 
// array to be the size of 3*3*1*element-byte-size (36 bytes in this case).

module attributes {omp.is_target_device = false} {
  llvm.func @_3d_target_array_section() {
    %0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr
    %1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr
    %2 = llvm.mlir.constant(1 : index) : i64
    %3 = llvm.mlir.constant(0 : index) : i64
    %4 = llvm.mlir.constant(2 : index) : i64
    %5 = omp.bounds   lower_bound(%3 : i64) upper_bound(%4 : i64) stride(%2 : i64) start_idx(%2 : i64)
    %6 = omp.bounds   lower_bound(%2 : i64) upper_bound(%2 : i64) stride(%2 : i64) start_idx(%2 : i64)
    %7 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>)   map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %6) -> !llvm.ptr {name = "inarray(1:3,1:3,2:2)"}
    %8 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>)   map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %5) -> !llvm.ptr {name = "outarray(1:3,1:3,1:3)"}
    omp.target   map_entries(%7, %8 : !llvm.ptr, !llvm.ptr) {
      %9 = llvm.mlir.constant(0 : i64) : i64
      %10 = llvm.mlir.constant(1 : i64) : i64
      %11 = llvm.getelementptr %0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>
      %12 = llvm.load %11 : !llvm.ptr -> i32
      %13 = llvm.getelementptr %1[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>
      llvm.store %12, %13 : i32, !llvm.ptr
      omp.terminator
    }
    llvm.return
  }
  llvm.mlir.global internal @_QFEinarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> {
    %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>>
    llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>>
  }
  llvm.mlir.global internal @_QFEoutarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> {
    %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>>
    llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>>
  }
}

// CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 36, i64 108]
// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 35, i64 35]
// CHECKL: @.offload_mapnames = private constant [2 x ptr] [ptr @0, ptr @1]

// CHECK: define void @_3d_target_array_section()

// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr @_QFEinarray, ptr %[[OFFLOADBASEPTRS]], align 8
// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr getelementptr inbounds ([3 x [3 x [3 x i32]]], ptr @_QFEinarray, i64 0, i64 1, i64 0, i64 0), ptr %[[OFFLOADPTRS]], align 8

// CHECK: %[[OFFLOADBASEPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADBASEPTRS2]], align 8
// CHECK: %[[OFFLOADPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADPTRS2]], align 8
+41 −0
Original line number Diff line number Diff line
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

module attributes {omp.is_target_device = true} {
  llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
    %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
    %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
    %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
    %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"}
    omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) {
      %4 = llvm.load %0 : !llvm.ptr -> i32
      llvm.store %4, %1 : i32, !llvm.ptr
      omp.terminator
    }
    llvm.return
  }
  llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 {
    %0 = llvm.mlir.constant(1 : i32) : i32
    llvm.return %0 : i32
  }
  llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 {
    %0 = llvm.mlir.constant(0 : i32) : i32
    llvm.return %0 : i32
  }
}

// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) {

// CHECK: entry:
// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8
// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8
// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8
// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8

// CHECK: user_code.entry:                                  ; preds = %entry
// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8 
// CHECK: br label %omp.target

// CHECK: omp.target:                                       ; preds = %user_code.entry
// CHECK:  %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4
// CHECK:  store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4
// CHECK: br label %omp.region.cont
+42 −0
Original line number Diff line number Diff line
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

module attributes {omp.is_target_device = false} {
  llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
    %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
    %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
    %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
    %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"}
    omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) {
      %4 = llvm.load %0 : !llvm.ptr -> i32
      llvm.store %4, %1 : i32, !llvm.ptr
      omp.terminator
    }
    llvm.return
  }
  llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 {
    %0 = llvm.mlir.constant(1 : i32) : i32
    llvm.return %0 : i32
  }
  llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 {
    %0 = llvm.mlir.constant(0 : i32) : i32
    llvm.return %0 : i32
  }
}

// CHECK: define void @_QQmain() {
// CHECK: %[[BYCOPY_ALLOCA:.*]] = alloca ptr, align 8

// CHECK: entry:                                            ; preds = %0
// CHECK: %[[LOAD_VAL:.*]] = load i32, ptr @_QFEi, align 4
// CHECK: store i32 %[[LOAD_VAL]], ptr %[[BYCOPY_ALLOCA]], align 4
// CHECK: %[[BYCOPY_LOAD:.*]] = load ptr, ptr %[[BYCOPY_ALLOCA]], align 8

// CHECK: %[[BASEPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr @_QFEsp, ptr %[[BASEPTR_BYREF]], align 8
// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr @_QFEsp, ptr %[[OFFLOADPTR_BYREF]], align 8

// CHECK: %[[BASEPTR_BYCOPY:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[BASEPTR_BYCOPY]], align 8
// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[OFFLOADPTR_BYREF]], align 8
+43 −18
Original line number Diff line number Diff line
@@ -38,15 +38,20 @@ llvm.func @_QPopenmp_target_data() {

// -----

llvm.func @_QPopenmp_target_data_region(%1 : !llvm.ptr) {
  %2 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
  omp.target_data map_entries(%2 : !llvm.ptr) {
    %3 = llvm.mlir.constant(99 : i32) : i32
    %4 = llvm.mlir.constant(1 : i64) : i64
    %5 = llvm.mlir.constant(1 : i64) : i64
    %6 = llvm.mlir.constant(0 : i64) : i64
    %7 = llvm.getelementptr %1[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32>
    llvm.store %3, %7 : i32, !llvm.ptr
llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) {
  %1 = llvm.mlir.constant(1023 : index) : i64
  %2 = llvm.mlir.constant(0 : index) : i64
  %3 = llvm.mlir.constant(1024 : index) : i64
  %4 = llvm.mlir.constant(1 : index) : i64
  %5 = omp.bounds   lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64)
  %6 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) bounds(%5)  -> !llvm.ptr {name = ""}
  omp.target_data map_entries(%6 : !llvm.ptr) {
    %7 = llvm.mlir.constant(99 : i32) : i32
    %8 = llvm.mlir.constant(1 : i64) : i64
    %9 = llvm.mlir.constant(1 : i64) : i64
    %10 = llvm.mlir.constant(0 : i64) : i64
    %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32>
    llvm.store %7, %11 : i32, !llvm.ptr
    omp.terminator
  }
  llvm.return
@@ -92,16 +97,36 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) {
  %11 = llvm.mlir.constant(10 : i32) : i32
  %12 = llvm.icmp "slt" %10, %11 : i32
  %13 = llvm.load %5 : !llvm.ptr -> i32
  %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""}
  %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
  %14 = llvm.mlir.constant(1023 : index) : i64
  %15 = llvm.mlir.constant(0 : index) : i64
  %16 = llvm.mlir.constant(1024 : index) : i64
  %17 = llvm.mlir.constant(1 : index) : i64
  %18 = omp.bounds   lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64)
  %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""}
  %19 = llvm.mlir.constant(511 : index) : i64
  %20 = llvm.mlir.constant(0 : index) : i64
  %21 = llvm.mlir.constant(512 : index) : i64
  %22 = llvm.mlir.constant(1 : index) : i64
  %23 = omp.bounds   lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64)
  %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""}
  omp.target_enter_data   if(%12 : i1) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr)
  %14 = llvm.load %7 : !llvm.ptr -> i32
  %15 = llvm.mlir.constant(10 : i32) : i32
  %16 = llvm.icmp "sgt" %14, %15 : i32
  %17 = llvm.load %5 : !llvm.ptr -> i32
  %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
  %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
  omp.target_exit_data   if(%16 : i1) device(%17 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr)
  %24 = llvm.load %7 : !llvm.ptr -> i32
  %25 = llvm.mlir.constant(10 : i32) : i32
  %26 = llvm.icmp "sgt" %24, %25 : i32
  %27 = llvm.load %5 : !llvm.ptr -> i32
  %28 = llvm.mlir.constant(1023 : index) : i64
  %29 = llvm.mlir.constant(0 : index) : i64
  %30 = llvm.mlir.constant(1024 : index) : i64
  %31 = llvm.mlir.constant(1 : index) : i64
  %32 = omp.bounds   lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64)
  %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""}
  %33 = llvm.mlir.constant(511 : index) : i64
  %34 = llvm.mlir.constant(0 : index) : i64
  %35 = llvm.mlir.constant(512 : index) : i64
  %36 = llvm.mlir.constant(1 : index) : i64
  %37 = omp.bounds   lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64)
  %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""}
  omp.target_exit_data   if(%26 : i1) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr)
  llvm.return
}

Loading