Commit 12cf2934 authored by Joel E. Denny's avatar Joel E. Denny
Browse files

[Clacc][OpenMP] Extend omp teams to permit nested omp tile

OpenMP 5.2, sec. 10.2 "teams Construct", p. 232, L9-12 restricts what
regions can be strictly nested within a `teams` construct.  This patch
relaxes Clang's enforcement of this restriction in the case of nested
`tile` constructs unless `-fno-openmp-extensions` is specified.  Cases
like the following then seem to work fine with no additional
implementation changes:

```
 #pragma omp target teams
 #pragma omp tile sizes(N, M)
 for (int i = 0; i < I; ++i) {
   for (int j = 0; j < J; ++j) {
     ...
   }
 }
```

This commit is similar to D126323 (48ca3a5e) plus D126547
(4a368136), which relaxed the restriction for an `atomic`
construct in a `teams` construct.

We are attempting to upstream this feature in D151350.
parent 59d18c36
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -378,10 +378,13 @@ considered for standardization. Please post on the
|Category                      | Feature                                                                           | Status                   | Reviews                                                |
+==============================+===================================================================================+==========================+========================================================+
| atomic extension             | `'atomic' strictly nested within 'teams'                                          | :good:`prototyped`       | D126323                                                |
|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#atomicWithinTeams>`_  |                          |                                                        |
|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#withinTeams>`_        |                          |                                                        |
+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
| device extension             | `'ompx_hold' map type modifier                                                    | :good:`prototyped`       | D106509, D106510                                       |
|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#ompx-hold>`_          |                          |                                                        |
+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
| tile extension               | `'tile' strictly nested within 'teams'                                            | :good:`prototyped`       | D151350                                                |
|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#withinTeams>`_        |                          |                                                        |
+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+

.. _Discourse forums (Runtimes - OpenMP category): https://discourse.llvm.org/c/runtimes/openmp/35
+7 −6
Original line number Diff line number Diff line
@@ -5204,12 +5204,13 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack,
      // only OpenMP regions that may be strictly nested inside the teams
      // region.
      //
      // As an extension, we permit atomic within teams as well.
      NestingProhibited = !isOpenMPParallelDirective(CurrentRegion) &&
      // As an extension, we permit atomic and tile within teams as well.
      NestingProhibited =
          !isOpenMPParallelDirective(CurrentRegion) &&
          !isOpenMPDistributeDirective(CurrentRegion) &&
          CurrentRegion != OMPD_loop &&
          !(SemaRef.getLangOpts().OpenMPExtensions &&
                            CurrentRegion == OMPD_atomic);
            (CurrentRegion == OMPD_atomic || CurrentRegion == OMPD_tile));
      Recommend = ShouldBeInParallelRegion;
    }
    if (!NestingProhibited && CurrentRegion == OMPD_loop) {
+4 −4
Original line number Diff line number Diff line
@@ -138,10 +138,10 @@ the runtime level. That is, OpenACC's dynamic reference count is
OpenMP's dynamic reference count, and OpenACC's structured reference
count is our OpenMP hold reference count extension.

.. _atomicWithinTeams:
.. _withinTeams:

``atomic`` Strictly Nested Within ``teams``
-------------------------------------------
Regions Strictly Nested Within ``teams``
----------------------------------------

Example
^^^^^^^
@@ -149,7 +149,7 @@ Example
OpenMP 5.2, sec. 10.2 "teams Construct", p. 232, L9-12 restricts what
regions can be strictly nested within a ``teams`` region.  As an
extension, Clang relaxes that restriction in the case of the
``atomic`` construct so that, for example, the following case is
``atomic`` or ``tile`` construct.  For example, the following case is
permitted:

.. code-block:: c++
+59 −0
Original line number Diff line number Diff line
// Check that omp tile (introduced in OpenMP 5.1) is permitted and behaves when
// strictly nested within omp target teams.  This is an extension to OpenMP 5.2
// and is enabled by default.

// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic

#include <omp.h>
#include <stdio.h>

#define NUM_TEAMS_UPPER 256
#define I_NTILES 8
#define J_NTILES 9
#define I_NELEMS 2
#define J_NELEMS 3

int main() {
  int numTeams;
  int order[NUM_TEAMS_UPPER][I_NTILES][J_NTILES][I_NELEMS][J_NELEMS];
  #pragma omp target teams num_teams(NUM_TEAMS_UPPER) map(from : numTeams)
  {
    int team = omp_get_team_num();
    if (team == 0)
      numTeams = omp_get_num_teams();
    int next = 0;
    #pragma omp tile sizes(I_NELEMS, J_NELEMS)
    for (int i = 0; i < I_NTILES * I_NELEMS; ++i) {
      for (int j = 0; j < J_NTILES * J_NELEMS; ++j) {
        int iTile = i / I_NELEMS;
        int jTile = j / J_NELEMS;
        int iElem = i % I_NELEMS;
        int jElem = j % J_NELEMS;
        order[team][iTile][jTile][iElem][jElem] = next++;
      }
    }
  }
  printf("numTeams = %d\n", numTeams);
  for (int team = 0; team < numTeams; ++team) {
    int expected = 0;
    for (int iTile = 0; iTile < I_NTILES; ++iTile) {
      for (int jTile = 0; jTile < J_NTILES; ++jTile) {
        for (int iElem = 0; iElem < I_NELEMS; ++iElem) {
          for (int jElem = 0; jElem < J_NELEMS; ++jElem) {
            int actual = order[team][iTile][jTile][iElem][jElem];
            if (expected != actual) {
              printf("error: order[%d][%d][%d][%d][%d] = %d, expected %d\n",
                     team, iTile, jTile, iElem, jElem, actual, expected);
              return 1;
            }
            ++expected;
          }
        }
      }
    }
  }
  // CHECK: success
  printf("success\n");
  return 0;
}
+59 −0
Original line number Diff line number Diff line
// Check that omp tile (introduced in OpenMP 5.1) is permitted and behaves when
// strictly nested within omp teams.  This is an extension to OpenMP 5.2 and is
// enabled by default.

// RUN: %libomp-compile -fopenmp-version=51
// RUN: %libomp-run 2>&1 | FileCheck %s

#include <omp.h>
#include <stdio.h>

#define NUM_TEAMS_UPPER 256
#define I_NTILES 8
#define J_NTILES 9
#define I_NELEMS 2
#define J_NELEMS 3

int main() {
  int numTeams;
  int order[NUM_TEAMS_UPPER][I_NTILES][J_NTILES][I_NELEMS][J_NELEMS];
  #pragma omp teams num_teams(NUM_TEAMS_UPPER)
  {
    int team = omp_get_team_num();
    if (team == 0)
      numTeams = omp_get_num_teams();
    int next = 0;
    #pragma omp tile sizes(I_NELEMS, J_NELEMS)
    for (int i = 0; i < I_NTILES * I_NELEMS; ++i) {
      for (int j = 0; j < J_NTILES * J_NELEMS; ++j) {
        int iTile = i / I_NELEMS;
        int jTile = j / J_NELEMS;
        int iElem = i % I_NELEMS;
        int jElem = j % J_NELEMS;
        order[team][iTile][jTile][iElem][jElem] = next++;
      }
    }
  }
  printf("numTeams = %d\n", numTeams);
  for (int team = 0; team < numTeams; ++team) {
    int expected = 0;
    for (int iTile = 0; iTile < I_NTILES; ++iTile) {
      for (int jTile = 0; jTile < J_NTILES; ++jTile) {
        for (int iElem = 0; iElem < I_NELEMS; ++iElem) {
          for (int jElem = 0; jElem < J_NELEMS; ++jElem) {
            int actual = order[team][iTile][jTile][iElem][jElem];
            if (expected != actual) {
              printf("error: order[%d][%d][%d][%d][%d] = %d, expected %d\n",
                     team, iTile, jTile, iElem, jElem, actual, expected);
              return 1;
            }
            ++expected;
          }
        }
      }
    }
  }
  // CHECK: success
  printf("success\n");
  return 0;
}