Commit 24bc227d authored by Joel E. Denny's avatar Joel E. Denny
Browse files

[Clacc][OpenMP] libomptarget: Don't map alignment padding to host

In the case of partially mapped structs, libomptarget sometimes adds
padding to device allocations to ensure they are aligned properly.
However, without this patch, it considers that padding to be mapped to
the host, which can cause presence checks (e.g.,
`omp_target_is_present` or a `present` modifier) to misbehave for
unmapped parts of the struct.  This patch keeps the padding but treats
it as unmapped.  See the new test case for examples.

We are attempting to upstream this fix in D149685.
parent ead675f1
Loading
Loading
Loading
Loading
+13 −10
Original line number Diff line number Diff line
@@ -68,7 +68,8 @@ struct HostDataToTargetTy {
  const uintptr_t HstPtrEnd;       // non-inclusive.
  const map_var_info_t HstPtrName; // Optional source name of mapped variable.

  const uintptr_t TgtPtrBegin; // target info.
  const uintptr_t TgtAllocBegin; // allocated target memory
  const uintptr_t TgtPtrBegin; // mapped target memory = TgtAllocBegin + padding

private:
  static const uint64_t INFRefCount = ~(uint64_t)0;
@@ -120,11 +121,13 @@ private:
  const std::unique_ptr<StatesTy> States;

public:
  HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB,
  HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E,
                     uintptr_t TgtAllocBegin, uintptr_t TgtPtrBegin,
                     bool UseHoldRefCount, map_var_info_t Name = nullptr,
                     bool IsINF = false)
      : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
        TgtPtrBegin(TB), States(std::make_unique<StatesTy>(UseHoldRefCount ? 0
        TgtAllocBegin(TgtAllocBegin), TgtPtrBegin(TgtPtrBegin),
        States(std::make_unique<StatesTy>(UseHoldRefCount ? 0
                                          : IsINF         ? INFRefCount
                                                          : 1,
                                          !UseHoldRefCount ? 0
@@ -448,8 +451,8 @@ struct DeviceTy {
  /// - Data transfer issue fails.
  TargetPointerResultTy getTargetPointer(
      HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
      int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
      bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
      int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName,
      bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
      bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
      bool HasNoAllocModifier, AsyncInfoTy &AsyncInfo,
      HostDataToTargetTy *OwnedTPR = nullptr, bool ReleaseHDTTMap = true);
+25 −18
Original line number Diff line number Diff line
@@ -98,6 +98,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
               /*HstPtrBase=*/(uintptr_t)HstPtrBegin,
               /*HstPtrBegin=*/(uintptr_t)HstPtrBegin,
               /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size,
               /*TgtAllocBegin=*/(uintptr_t)TgtPtrBegin,
               /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin,
               /*UseHoldRefCount=*/false, /*Name=*/nullptr,
               /*IsRefCountINF=*/true))
@@ -289,10 +290,11 @@ size_t DeviceTy::getAccessibleBuffer(void *Ptr, int64_t Size, void **BufferHost,

TargetPointerResultTy DeviceTy::getTargetPointer(
    HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
    int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways,
    bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
    bool HasPresentModifier, bool HasHoldModifier, bool HasNoAllocModifier,
    AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {
    int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
    bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
    bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
    bool HasNoAllocModifier, AsyncInfoTy &AsyncInfo,
    HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {

  LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR);
  LR.TPR.Flags.IsPresent = true;
@@ -379,7 +381,9 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
  } else if (Size) {
    // If it is not contained and Size > 0, we should create a new entry for it.
    LR.TPR.Flags.IsNewEntry = true;
    uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin);
    uintptr_t TgtAllocBegin =
        (uintptr_t)allocData(TgtPadding + Size, HstPtrBegin);
    uintptr_t TgtPtrBegin = TgtAllocBegin + TgtPadding;
#if OMPT_SUPPORT
    // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 353, L6-7:
    // "The target-data-op-begin event occurs before a thread initiates a data
@@ -427,13 +431,13 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
          ompt_callback_target_data_op_emi)(
          ompt_scope_end, /*target_task_data=*/NULL, /*target_data=*/NULL,
          /*host_op_id=*/NULL, ompt_target_data_alloc, HstPtrBegin,
          omp_get_initial_device(), (void *)Ptr, DeviceID, Size,
          omp_get_initial_device(), (void *)TgtPtrBegin, DeviceID, Size,
          /*codeptr_ra=*/NULL);
      OmptApi.ompt_target_callbacks->ompt_callback(
          ompt_callback_target_data_op_emi)(
          ompt_scope_beginend, /*target_task_data=*/NULL, /*target_data=*/NULL,
          /*host_op_id=*/NULL, ompt_target_data_associate, HstPtrBegin,
          omp_get_initial_device(), (void *)Ptr, DeviceID, Size,
          omp_get_initial_device(), (void *)TgtPtrBegin, DeviceID, Size,
          /*codeptr_ra=*/NULL);
    }
#endif
@@ -442,18 +446,20 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
    LR.TPR.setEntry(HDTTMap
                        ->emplace(new HostDataToTargetTy(
                            (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
                            (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier,
                            HstPtrName))
                            (uintptr_t)HstPtrBegin + Size, TgtAllocBegin,
                            TgtPtrBegin, HasHoldModifier, HstPtrName))
                        .first->HDTT);
    INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
         "Creating new map entry with HstPtrBase=" DPxMOD
         ", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, "
         "DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
         DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size,
         ", HstPtrBegin=" DPxMOD ", TgtAllocBegin=" DPxMOD
         ", TgtPtrBegin=" DPxMOD
         ", Size=%ld, DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
         DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(TgtAllocBegin),
         DPxPTR(TgtPtrBegin), Size,
         LR.TPR.getEntry()->dynRefCountToStr().c_str(),
         LR.TPR.getEntry()->holdRefCountToStr().c_str(),
         (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
    LR.TPR.TargetPointer = (void *)Ptr;
    LR.TPR.TargetPointer = (void *)TgtPtrBegin;

    // Notify the plugin about the new mapping.
    if (notifyDataMapped(HstPtrBegin, Size))
@@ -650,8 +656,9 @@ int DeviceTy::eraseMapEntry(HDTTMapAccessorTy &HDTTMap,
int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) {
  assert(Entry && "Trying to deallocate a null entry.");

  DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
     DPxPTR(Entry->TgtPtrBegin), Size);
  DP("Deleting tgt data " DPxMOD " of size %" PRId64 " by freeing allocation "
     "starting at " DPxMOD "\n",
     DPxPTR(Entry->TgtPtrBegin), Size, DPxPTR(Entry->TgtAllocBegin));

  void *Event = Entry->getEvent();
  if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) {
@@ -686,7 +693,7 @@ int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) {
        (void *)Entry->TgtPtrBegin, DeviceID, Size, /*codeptr_ra=*/NULL);
  }
#endif
  int Ret = deleteData((void *)Entry->TgtPtrBegin);
  int Ret = deleteData((void *)Entry->TgtAllocBegin);

  // Notify the plugin about the unmapped memory.
  Ret |= notifyDataUnmapped((void *)Entry->HstPtrBegin);
@@ -775,8 +782,8 @@ void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
  return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
}

int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) {
  return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind);
int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
  return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind);
}

static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin,
+9 −28
Original line number Diff line number Diff line
@@ -208,6 +208,7 @@ static int initLibrary(DeviceTy &Device) {
              (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
              (uintptr_t)CurrHostEntry->addr +
                  CurrHostEntry->size /*HstPtrEnd*/,
              (uintptr_t)CurrDeviceEntry->addr /*TgtAllocBegin*/,
              (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
              false /*UseHoldRefCount*/, CurrHostEntry->name,
              true /*IsRefCountINF*/));
@@ -674,18 +675,16 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
    // Adjust for proper alignment if this is a combined entry (for structs).
    // Look at the next argument - if that is MEMBER_OF this one, then this one
    // is a combined entry.
    int64_t Padding = 0;
    int64_t TgtPadding = 0;
    const int NextI = I + 1;
    if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
        getParentIndex(ArgTypes[NextI]) == I) {
      int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase);
      Padding = (int64_t)HstPtrBegin % Alignment;
      if (Padding) {
      TgtPadding = (int64_t)HstPtrBegin % Alignment;
      if (TgtPadding) {
        DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
           "\n",
           Padding, DPxPTR(HstPtrBegin));
        HstPtrBegin = (char *)HstPtrBegin - Padding;
        DataSize += Padding;
           TgtPadding, DPxPTR(HstPtrBegin));
      }
    }

@@ -729,7 +728,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
      // the pointer not to be allocated yet?  If not, then does
      // HasPresentModifier or HasNoAllocModifier still matter here?
      PointerTpr = Device.getTargetPointer(
          HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *),
          HDTTMap, HstPtrBase, HstPtrBase, /*TgtPadding=*/0, sizeof(void *),
          /*HstPtrName=*/nullptr,
          /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef,
          HasCloseModifier, HasPresentModifier, HasHoldModifier,
@@ -760,8 +759,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
    const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
    // Note that HDTTMap will be released in getTargetPointer.
    auto TPR = Device.getTargetPointer(
        HDTTMap, HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo,
        HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
        HDTTMap, HstPtrBegin, HstPtrBase, TgtPadding, DataSize, HstPtrName,
        HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
        HasPresentModifier, HasHoldModifier, HasNoAllocModifier, AsyncInfo,
        PointerTpr.getEntry());
    void *TgtPtrBegin = TPR.TargetPointer;
@@ -967,25 +966,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
    }

    void *HstPtrBegin = Args[I];
    void *HstPtrBase = ArgBases[I];
    int64_t DataSize = ArgSizes[I];
    // Adjust for proper alignment if this is a combined entry (for structs).
    // Look at the next argument - if that is MEMBER_OF this one, then this one
    // is a combined entry.
    const int NextI = I + 1;
    if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
        getParentIndex(ArgTypes[NextI]) == I) {
      int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase);
      int64_t Padding = (int64_t)HstPtrBegin % Alignment;
      if (Padding) {
        DP("Using a Padding of %" PRId64 " bytes for begin address " DPxMOD
           "\n",
           Padding, DPxPTR(HstPtrBegin));
        HstPtrBegin = (char *)HstPtrBegin - Padding;
        DataSize += Padding;
      }
    }

    bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
    bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
                      (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
+43 −0
Original line number Diff line number Diff line
// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-fail-generic 2>&1 \
// RUN: | %fcheck-generic

// The host memory layout for the following program looks like this:
//
//   | 4 bytes | 4 bytes | 8 bytes |
//   |   s.x   |   s.y   |   s.z   |
//   `-----------------------------'
//
// s is always at least 8-byte aligned in host memory due to s.z, so
// libomptarget's device padding for map(s.y,s.z) always maps to host memory
// that includes s.x.  At one time, s.x appeared to be mapped as a result, but
// libomptarget has since been fixed not to consider device padding as mapped to
// host memory.

#include <omp.h>
#include <stdio.h>

int main() {
  struct S { int x; int y; double z; } s = {1, 2, 3};

  // CHECK: &s.x = 0x[[#%x,HOST_ADDR:]], size = [[#%u,SIZE:]]
  fprintf(stderr, "&s = %p\n", &s);
  fprintf(stderr, "&s.x = %p, size = %ld\n", &s.x, sizeof s.x);
  fprintf(stderr, "&s.y = %p\n", &s.y);
  fprintf(stderr, "&s.z = %p\n", &s.z);

  // CHECK: s.x is present: 0
  // CHECK: s.x = 1{{$}}
  #pragma omp target enter data map(alloc: s.y, s.z)
  int dev = omp_get_default_device();
  fprintf(stderr, "s.x is present: %d\n", omp_target_is_present(&s.x, dev));
  #pragma omp target update from(s.x) // should have no effect
  fprintf(stderr, "s.x = %d\n", s.x);

  // CHECK: Libomptarget message: device mapping required by 'present' map type modifier does not exist for host address 0x{{0*}}[[#HOST_ADDR]] ([[#SIZE]] bytes)
  // CHECK: Libomptarget error: Call to getTargetPointer returned null pointer ('present' map type modifier).
  // CHECK: Libomptarget fatal error 1: failure of target construct while offloading is mandatory
  #pragma omp target enter data map(present, alloc: s.x)

  return 0;
}
+3 −1
Original line number Diff line number Diff line
@@ -35,7 +35,9 @@
// padding for s, libomptarget reported an array extension error.  collidePost
// is never fully contained within that padding (which would avoid the extension
// error) because collidePost is 16 bytes while the padding is always less than
// 16 bytes due to the modulo operations.
// 16 bytes due to the modulo operations.  (Later, libomptarget was changed not
// to consider padding to be mapped to the host, so it cannot be involved in
// array extension errors.)

#include <stdint.h>
#include <stdio.h>