Commit f8c968fe authored by Joachim Jenke's avatar Joachim Jenke
Browse files

[TSan][OpenMP][Archer] Treat all reduction operations as atomic

This patch rebases https://reviews.llvm.org/D108046 to the new ThreadSanitizer
runtime.

The idea of the new ThreadSanitizer Annotation function is to promote all memory
accesses to be treated and logged as they would be explicit atomic accesses. I
used the performance benchmark from the initial fiber review
(https://reviews.llvm.org/D54889#1343582). The TSan-specific changes of this PR
increase the execution time from 8.37 to 8.52 seconds on my system, which is a
1.8% runtime increase.

The current tests for this new Annotation are integrated into the tests for the
OpenMP-specific tool Archer.

The new Annotations are used in Archer to promote all memory accesses performed
to implement an OpenMP reduction as being atomic accesses. With these changes,
ThreadSanitizer+Archer successfully detect the race in
openmp/tools/archer/tests/races/parallel-for-array-reduction-no-barrier.c. The
challenge in this test is to detect the race between the memory access from the
primary thread before the reduction (line 30), which is not synchronized with
the OpenMP reduction (line 31).

The OpenMP CodeGen generates three different code patterns for the reduction
from which the OpenMP runtime chooses one implementation at runtime. The new
analysis is only compatible with two of these code patterns, therefore we
skip generation of the third code pattern, if the TSan flag is present during
compilation.

Under review as #74631
parent 49e6e3b3
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -5019,13 +5019,16 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction(
  Args.push_back(&RHSArg);
  const auto &CGFI =
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  CodeGenFunction CGF(CGM);
  std::string Name = getReductionFuncName(ReducerName);
  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
                                    llvm::GlobalValue::InternalLinkage, Name,
                                    &CGM.getModule());
  if (CGF.SanOpts.has(SanitizerKind::Thread)) {
    return Fn;
  }
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
  Fn->setDoesNotRecurse();
  CodeGenFunction CGF(CGM);
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
  // Dst = (void*[n])(LHSArg);
@@ -5217,6 +5220,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
  llvm::Function *ReductionFn = emitReductionFunction(
      CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
      Privates, LHSExprs, RHSExprs, ReductionOps);
  llvm::Value *ReductionFnP = ReductionFn;
  if (CGF.SanOpts.has(SanitizerKind::Thread)) {
    ReductionFnP = llvm::ConstantPointerNull::get(
        llvm::PointerType::get(ReductionFn->getFunctionType(), 0));
  }
  // 3. Create static kmp_critical_name lock = { 0 };
  std::string Name = getName({"reduction"});
@@ -5235,7 +5243,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
      CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
      ReductionArrayTySize,                  // size_type sizeof(RedList)
      RL,                                    // void *RedList
      ReductionFn, // void (*) (void *, void *) <reduce_func>
      ReductionFnP, // void (*) (void *, void *) <reduce_func>
      Lock          // kmp_critical_name *&<lock>
  };
  llvm::Value *Res = CGF.EmitRuntimeCall(
+10 −0
Original line number Diff line number Diff line
@@ -266,6 +266,16 @@ void INTERFACE_ATTRIBUTE AnnotateBenignRace(
  BenignRaceImpl(f, l, mem, 1, desc);
}

void INTERFACE_ATTRIBUTE AnnotateAllAtomicBegin(char *f, int l) {
  SCOPED_ANNOTATION(AnnotateAllAtomicBegin);
  ThreadAtomicBegin(thr, pc);
}

void INTERFACE_ATTRIBUTE AnnotateAllAtomicEnd(char *f, int l) {
  SCOPED_ANNOTATION(AnnotateAllAtomicEnd);
  ThreadAtomicEnd(thr);
}

void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsBegin(char *f, int l) {
  SCOPED_ANNOTATION(AnnotateIgnoreReadsBegin);
  ThreadIgnoreBegin(thr, pc);
+15 −0
Original line number Diff line number Diff line
@@ -1053,6 +1053,21 @@ void ThreadIgnoreEnd(ThreadState *thr) {
  }
}

void ThreadAtomicBegin(ThreadState* thr, uptr pc) {
  thr->all_atomic++;
//  CHECK_GT(thr->ignore_reads_and_writes, 0);
  CHECK_EQ(thr->all_atomic, 1);
  thr->fast_state.SetAtomicBit();
}

void ThreadAtomicEnd(ThreadState *thr) {
  CHECK_GT(thr->all_atomic, 0);
  thr->all_atomic--;
  if (thr->all_atomic == 0) {
    thr->fast_state.ClearAtomicBit();
  }
}

#if !SANITIZER_GO
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
uptr __tsan_testonly_shadow_stack_current_size() {
+3 −0
Original line number Diff line number Diff line
@@ -182,6 +182,7 @@ struct ThreadState {
  // for better performance.
  int ignore_reads_and_writes;
  int suppress_reports;
  int all_atomic;
  // Go does not support ignores.
#if !SANITIZER_GO
  IgnoreSet mop_ignore_set;
@@ -550,6 +551,8 @@ void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
                                         uptr size);

void ThreadAtomicBegin(ThreadState *thr, uptr pc);
void ThreadAtomicEnd(ThreadState *thr);
void ThreadIgnoreBegin(ThreadState *thr, uptr pc);
void ThreadIgnoreEnd(ThreadState *thr);
void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
+7 −2
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#ifndef TSAN_SHADOW_H
#define TSAN_SHADOW_H

#include "sanitizer_common/sanitizer_common.h"
#include "tsan_defs.h"

namespace __tsan {
@@ -21,8 +22,8 @@ class FastState {
    part_.unused0_ = 0;
    part_.sid_ = static_cast<u8>(kFreeSid);
    part_.epoch_ = static_cast<u16>(kEpochLast);
    part_.unused1_ = 0;
    part_.ignore_accesses_ = false;
    part_.all_atomic_ = false;
  }

  void SetSid(Sid sid) { part_.sid_ = static_cast<u8>(sid); }
@@ -37,14 +38,18 @@ class FastState {
  void ClearIgnoreBit() { part_.ignore_accesses_ = 0; }
  bool GetIgnoreBit() const { return part_.ignore_accesses_; }

  void SetAtomicBit() { part_.all_atomic_ = 1; }
  void ClearAtomicBit() { part_.all_atomic_ = 0; }
  bool GetAtomicBit() const { return part_.all_atomic_; }

 private:
  friend class Shadow;
  struct Parts {
    u32 unused0_ : 8;
    u32 sid_ : 8;
    u32 epoch_ : kEpochBits;
    u32 unused1_ : 1;
    u32 ignore_accesses_ : 1;
    u32 all_atomic_ : 1;
  };
  union {
    Parts part_;
Loading