Unverified Commit 62ae549f authored by Razvan Lupusoru's avatar Razvan Lupusoru Committed by GitHub
Browse files

[flang][openacc] Add implicit copy for reduction in combined construct (#70148)

After PR#69417, lowering for combined constructs was updated to adhere
to OpenACC 3.3, section 2.11: `A private or reduction clause on a
combined construct is treated as if it appeared on the loop construct.`

However, the second part of that paragraph notes `In addition, a
reduction clause on a combined construct implies a copy clause`. Since
the acc dialect decomposes combined constructs, it is important to
distinguish between the case where an explicit data clause is required
(as noted in section 2.6.2) and the case where an implicit data action
must be generated by compiler.
parent 53705ddc
Loading
Loading
Loading
Loading
+34 −22
Original line number Diff line number Diff line
@@ -336,7 +336,7 @@ static void genDeclareDataOperandOperationsWithModifier(
template <typename EntryOp, typename ExitOp>
static void genDataExitOperations(fir::FirOpBuilder &builder,
                                  llvm::SmallVector<mlir::Value> operands,
                                  bool structured, bool implicit) {
                                  bool structured) {
  for (mlir::Value operand : operands) {
    auto entryOp = mlir::dyn_cast_or_null<EntryOp>(operand.getDefiningOp());
    assert(entryOp && "data entry op expected");
@@ -346,7 +346,7 @@ static void genDataExitOperations(fir::FirOpBuilder &builder,
      varPtr = entryOp.getVarPtr();
    builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
                           entryOp.getBounds(), entryOp.getDataClause(),
                           structured, implicit,
                           structured, entryOp.getImplicit(),
                           builder.getStringAttr(*entryOp.getName()));
  }
}
@@ -1872,9 +1872,24 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,
    } else if (const auto *reductionClause =
                   std::get_if<Fortran::parser::AccClause::Reduction>(
                       &clause.u)) {
      if (!outerCombined)
      // A reduction clause on a combined construct is treated as if it appeared
      // on the loop construct. So don't generate a reduction clause when it is
      // combined - delay it to the loop. However, a reduction clause on a
      // combined construct implies a copy clause so issue an implicit copy
      // instead.
      if (!outerCombined) {
        genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
                      reductionOperands, reductionRecipes);
      } else {
        auto crtDataStart = dataClauseOperands.size();
        genDataOperandOperations<mlir::acc::CopyinOp>(
            std::get<Fortran::parser::AccObjectList>(reductionClause->v.t),
            converter, semanticsContext, stmtCtx, dataClauseOperands,
            mlir::acc::DataClause::acc_reduction,
            /*structured=*/true, /*implicit=*/true);
        copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                 dataClauseOperands.end());
      }
    } else if (const auto *defaultClause =
                   std::get_if<Fortran::parser::AccClause::Default>(
                       &clause.u)) {
@@ -1944,13 +1959,13 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,

  // Create the exit operations after the region.
  genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
      builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, copyEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
      builder, copyoutEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, copyoutEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
      builder, attachEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, attachEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
      builder, createEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, createEntryOperands, /*structured=*/true);

  builder.restoreInsertionPoint(insPt);
  return computeOp;
@@ -2099,13 +2114,13 @@ static void genACCDataOp(Fortran::lower::AbstractConverter &converter,

  // Create the exit operations after the region.
  genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
      builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, copyEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
      builder, copyoutEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, copyoutEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
      builder, attachEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, attachEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
      builder, createEntryOperands, /*structured=*/true, /*implicit=*/false);
      builder, createEntryOperands, /*structured=*/true);

  builder.restoreInsertionPoint(insPt);
}
@@ -2415,11 +2430,11 @@ genACCExitDataOp(Fortran::lower::AbstractConverter &converter,
    exitDataOp.setFinalizeAttr(builder.getUnitAttr());

  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::CopyoutOp>(
      builder, copyoutOperands, /*structured=*/false, /*implicit=*/false);
      builder, copyoutOperands, /*structured=*/false);
  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DeleteOp>(
      builder, deleteOperands, /*structured=*/false, /*implicit=*/false);
      builder, deleteOperands, /*structured=*/false);
  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DetachOp>(
      builder, detachOperands, /*structured=*/false, /*implicit=*/false);
      builder, detachOperands, /*structured=*/false);
}

template <typename Op>
@@ -2594,7 +2609,7 @@ genACCUpdateOp(Fortran::lower::AbstractConverter &converter,
      builder, currentLocation, operands, operandSegments);

  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::UpdateHostOp>(
      builder, updateHostOperands, /*structured=*/false, /*implicit=*/false);
      builder, updateHostOperands, /*structured=*/false);

  if (addAsyncAttr)
    updateOp.setAsyncAttr(builder.getUnitAttr());
@@ -3054,17 +3069,14 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
      builder.setInsertionPointAfter(declareOp);
    }
    genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
        builder, createEntryOperands, /*structured=*/true,
        /*implicit=*/false);
        builder, createEntryOperands, /*structured=*/true);
    genDataExitOperations<mlir::acc::DeclareDeviceResidentOp,
                          mlir::acc::DeleteOp>(
        builder, deviceResidentEntryOperands, /*structured=*/true,
        /*implicit=*/false);
        builder, deviceResidentEntryOperands, /*structured=*/true);
    genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
        builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
        builder, copyEntryOperands, /*structured=*/true);
    genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
        builder, copyoutEntryOperands, /*structured=*/true,
        /*implicit=*/false);
        builder, copyoutEntryOperands, /*structured=*/true);
  });
}

+5 −1
Original line number Diff line number Diff line
@@ -751,12 +751,16 @@ subroutine acc_kernels_loop
    reduction_i = 1
  end do

! CHECK:      acc.kernels {
! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
! CHECK:      acc.kernels dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK:          fir.do_loop
! CHECK:          acc.yield
! CHECK-NEXT:   }{{$}}
! CHECK:        acc.terminator
! CHECK-NEXT: }{{$}}
! CHECK:      acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
! CHECK:      acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}

end subroutine
+5 −1
Original line number Diff line number Diff line
@@ -770,13 +770,17 @@ subroutine acc_parallel_loop
    reduction_i = 1
  end do

! CHECK:      acc.parallel {
! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
! CHECK:      acc.parallel dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK:          fir.do_loop
! CHECK:          acc.yield
! CHECK-NEXT:   }{{$}}
! CHECK:        acc.yield
! CHECK-NEXT: }{{$}}
! CHECK:      acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
! CHECK:      acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}

  !$acc parallel loop
  do 10 i=0, n
+5 −1
Original line number Diff line number Diff line
@@ -705,12 +705,16 @@ subroutine acc_serial_loop
    reduction_i = 1
  end do

! CHECK:      acc.serial {
! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
! CHECK:      acc.serial dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK:          fir.do_loop
! CHECK:          acc.yield
! CHECK-NEXT:   }{{$}}
! CHECK:        acc.yield
! CHECK-NEXT: }{{$}}
! CHECK:      acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
! CHECK:      acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}

end subroutine acc_serial_loop
+4 −2
Original line number Diff line number Diff line
@@ -131,7 +131,8 @@ LogicalResult acc::CopyinOp::verify() {
  // Test for all clauses this operation can be decomposed from:
  if (!getImplicit() && getDataClause() != acc::DataClause::acc_copyin &&
      getDataClause() != acc::DataClause::acc_copyin_readonly &&
      getDataClause() != acc::DataClause::acc_copy)
      getDataClause() != acc::DataClause::acc_copy &&
      getDataClause() != acc::DataClause::acc_reduction)
    return emitError(
        "data clause associated with copyin operation must match its intent"
        " or specify original clause this operation was decomposed from");
@@ -212,7 +213,8 @@ LogicalResult acc::CopyoutOp::verify() {
  // Test for all clauses this operation can be decomposed from:
  if (getDataClause() != acc::DataClause::acc_copyout &&
      getDataClause() != acc::DataClause::acc_copyout_zero &&
      getDataClause() != acc::DataClause::acc_copy)
      getDataClause() != acc::DataClause::acc_copy &&
      getDataClause() != acc::DataClause::acc_reduction)
    return emitError(
        "data clause associated with copyout operation must match its intent"
        " or specify original clause this operation was decomposed from");