Commit 4a11bb40 authored by Joachim Jenke's avatar Joachim Jenke
Browse files

[OpenMP][Archer][TSan] Use TSan fibers to implement loop-level analysi for OpenMP ws-loops

A new runtime option ARCHER_OPTION=dispatch=<n> was added to execute loop-level
data race analysis in Archer.
parent 3a41480a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -801,6 +801,7 @@ void FuncExit(ThreadState *thr) {
#if !SANITIZER_GO
  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
#endif
  CHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
  thr->shadow_stack_pos--;
}

+0 −17
Original line number Diff line number Diff line
@@ -483,23 +483,6 @@ int __ompt_get_task_memory_internal(void **addr, size_t *size, int blocknum) {
  *addr = taskdata;
  *size = taskdata->td_size_alloc;
  return 0;

  /*  void *ret_addr;
    int64_t ret_size = taskdata->td_size_alloc - sizeof(kmp_taskdata_t);

    // kmp_task_t->data1 is an optional member
    if (taskdata->td_flags.destructors_thunk)
      ret_addr = &task->data1 + 1;
    else
      ret_addr = &task->part_id + 1;

    ret_size -= (char *)(ret_addr) - (char *)(task);
    if (ret_size < 0)
      return 0;

    *addr = ret_addr;
    *size = (size_t)ret_size;
    return 1;*/
}

//----------------------------------------------------------
+70 −1
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@ public:
  int ignore_serial{0};
  std::atomic<int> all_memory{0};
  int tasking{0};
  int dispatch_fibers{0};
  int stack_size{1024};
  std::atomic<int> untieds{0};

@@ -103,6 +104,8 @@ public:
          continue;
        if (sscanf(token.c_str(), "tasking=%d", &tasking))
          continue;
        if (sscanf(token.c_str(), "dispatch_fibers=%d", &dispatch_fibers))
          continue;
        if (sscanf(token.c_str(), "stack_size=%d", &stack_size))
          continue;
        if (sscanf(token.c_str(), "ignore_serial=%d", &ignore_serial))
@@ -505,6 +508,31 @@ struct TaskData final : DataPoolEntry<TaskData> {
  size_t PrivateDataSize{0};
  void *PrivateDataAddr{nullptr};

  std::vector<void *> DispatchFibers;
  std::size_t DispatchFiberIdx{0};
  void *DispatchOriginalFiber{nullptr};

  void activateDispatchFibers(const std::size_t size, void *fiber = nullptr) {
    DispatchFibers.resize(size);
    DispatchOriginalFiber = fiber;
    for (auto &RFiber : DispatchFibers)
      RFiber = TsanCreateFiber(1);
  }

  void *switchDispatchFiber() {
    void *Res = DispatchFibers.at(DispatchFiberIdx);
    DispatchFiberIdx = (DispatchFiberIdx + 1) % DispatchFibers.size();
    TsanSwitchToFiber(Res, 1);
    return Res;
  }

  void deactivateDispatchFibers() {
    if (DispatchOriginalFiber)
      TsanSwitchToFiber(DispatchOriginalFiber, 1);
    for (auto &Fiber : DispatchFibers)
      TsanDestroyFiber(Fiber);
  }

  const void *CodePtr{nullptr};
  /// Count how often this structure has been put into child tasks + 1.
  std::atomic_int RefCount{1};
@@ -662,7 +690,7 @@ static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
  DependencyDataPool::ThreadDataPool = new DependencyDataPool;
  TsanNewMemory(DependencyDataPool::ThreadDataPool,
                sizeof(DependencyDataPool::ThreadDataPool));
  if (archer_flags->tasking) {
  if (archer_flags->tasking || archer_flags->dispatch_fibers) {
    TsanGetCurrentFiber();
  }

@@ -712,6 +740,43 @@ static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
#endif
}

static void ompt_tsan_dispatch(ompt_data_t *parallel_data,
                               ompt_data_t *task_data, ompt_dispatch_t kind,
                               ompt_data_t instance) {
  auto *Data = ToTaskData(task_data);
  switch (kind) {
  case ompt_dispatch_section:
  case ompt_dispatch_ws_loop_chunk:
  case ompt_dispatch_distribute_chunk:
    Data->switchDispatchFiber();
    break;
  case ompt_dispatch_taskloop_chunk:
  case ompt_dispatch_iteration:
    break;
  }
}

static void ompt_tsan_work(ompt_work_t work_type,
                           ompt_scope_endpoint_t endpoint,
                           ompt_data_t *parallel_data, ompt_data_t *task_data,
                           uint64_t count, const void *codeptr_ra) {
  auto *Data = ToTaskData(task_data);
  switch (endpoint) {
  case ompt_scope_begin:
    Data->activateDispatchFibers(std::min(archer_flags->dispatch_fibers == -1
                                              ? static_cast<uint64_t>(13)
                                              : count,
                                          count),
                                 TsanGetCurrentFiber());
    break;
  case ompt_scope_end:
    Data->deactivateDispatchFibers();
    break;
  case ompt_scope_beginend:
    break;
  }
}

static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
                                    ompt_data_t *parallel_data,
                                    ompt_data_t *task_data,
@@ -1291,6 +1356,10 @@ static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
  SET_CALLBACK(implicit_task);
  SET_CALLBACK(sync_region);
  SET_CALLBACK(parallel_end);
  if (__archer::archer_flags->dispatch_fibers) {
    SET_CALLBACK(dispatch);
    SET_CALLBACK(work);
  }

  SET_CALLBACK(task_create);
  SET_CALLBACK(task_schedule);
+40 −0
Original line number Diff line number Diff line
/*
 * loop-level.c -- Archer testcase
 */
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
//
// See tools/archer/LICENSE.txt for details.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// RUN: %libarcher-compile && env ARCHER_OPTIONS=dispatch_fibers=1 %libarcher-run | FileCheck %s
// RUN: %libarcher-compile && env ARCHER_OPTIONS=dispatch_fibers=1:ignore_serial=1 %libarcher-run | FileCheck %s
// REQUIRES: tsan
// XFAIL: *
#include <omp.h>
#include <stdio.h>
#include <unistd.h>

#define NUM_THREADS 2

int main(int argc, char *argv[]) {
  int vars[NUM_THREADS] = {0};
  int i;
  const int len = 10;

#pragma omp parallel for num_threads(NUM_THREADS) shared(vars)                 \
    schedule(dynamic, 1)
  for (i = 0; i < len; i++) {
    vars[omp_get_thread_num()]++;
  }

  fprintf(stderr, "DONE\n");
  return 0;
}

// CHECK-NOT: ThreadSanitizer: data race
// CHECK-NOT: ThreadSanitizer: reported
// CHECK: DONE
+48 −0
Original line number Diff line number Diff line
/*
 * parallel-for-antidep-dynamic.c -- Archer testcase
 */
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
//
// See tools/archer/LICENSE.txt for details.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// RUN: %libarcher-compile && env ARCHER_OPTIONS=dispatch_fibers=1 %libarcher-run-race | FileCheck %s
// RUN: %libarcher-compile && env ARCHER_OPTIONS=dispatch_fibers=1:ignore_serial=1 %libarcher-run-race | FileCheck %s
// REQUIRES: tsan
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>

#define LEN 6

int main(int argc, char *argv[]) {
  int indeces[LEN] = {1, 3, 5, 7, 9, 13};
  double base[26];
  double *p = base;
  double *q = p + 12;
  int i;

  for (i = 1; i < 26; ++i)
    base[i] = 0.5 * i;

#pragma omp parallel for num_threads(2) schedule(dynamic, 1)
  for (i = 0; i < LEN; ++i) {
    int idx = indeces[i];
    p[idx] += 1.0 + i;
    q[idx] += 3.0 + i;
  }

  fprintf(stderr, "DONE.\n");
  return 0;
}

// CHECK: WARNING: ThreadSanitizer: data race
// CHECK:   Write of size 8
// CHECK-NEXT: #0 {{.*}}parallel-for-antidep-dynamic.c:3{{4|5}}
// CHECK:   Previous write of size 8
// CHECK-NEXT: #0 {{.*}}parallel-for-antidep-dynamic.c:3{{4|5}}
// CHECK: DONE
// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings
Loading