Commit a0c0389f authored by Fangrui Song's avatar Fangrui Song
Browse files

[SimplifyLibcalls] Don't replace locked IO...

[SimplifyLibcalls] Don't replace locked IO (fgetc/fgets/fputc/fputs/fread/fwrite) with unlocked IO (*_unlocked)

This essentially reverts some of the SimplifyLibcalls part changes of D45736 [SimplifyLibcalls] Replace locked IO with unlocked IO.

C11 7.21.5.2 The fflush function

> If stream is a null pointer, the fflush function performs this flushing action on all streams for which the behavior is defined above.

i.e. fopen'ed FILE* is inherently captured.

POSIX.1-2017 getc_unlocked, getchar_unlocked, putc_unlocked, putchar_unlocked - stdio with explicit client locking

> These functions can safely be used in a multi-threaded program if and only if they are called while the invoking thread owns the ( FILE *) object, as is the case after a successful call to the flockfile() or ftrylockfile() functions.

After a thread fopen'ed a FILE*, when it is calling foobar() which is now replaced by foobar_unlocked(),
if another thread is concurrently calling fflush(0), the behavior is undefined.

C11 7.22.4.4 The exit function

> Next, all open streams with unwritten buffered data are flushed, all open streams are closed, and all files created by the tmpfile function are removed.

The replacement is only feasible if the program is single threaded, or exit or fflush(0) is never called.
See also http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20180528/556615.html
for how the replacement makes libc interceptors difficult to implement.

dalias: in a worst case, it's unbounded data corruption because of concurrent access to pointers
without synchronization.  f->wpos or rpos could get outside of the buffer, thread A could do
f->wpos += j after knowing j is in bounds, while thread B also changes it concurrently.

This can produce exploitable conditions depending on libc internals.

Revert the SimplifyLibcalls part change because the cons obviously
overweigh the pros.  Even when the replacement is feasible, the benefit
is indemonstrable, more so in an application instead of an artificial
glibc benchmark.  Theoretically the replacement could be beneficial when
calling getc_unlocked/putc_unlocked in a loop, but then it is better
using a blocked IO operation and the user is likely aware of that.

The function attribute inference is still useful and thus kept.

Reviewed By: xbolva00

Differential Revision: https://reviews.llvm.org/D75933
parent 9624beb3
Loading
Loading
Loading
Loading
+0 −31
Original line number Diff line number Diff line
@@ -187,21 +187,11 @@ namespace llvm {
  Value *emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
                   const TargetLibraryInfo *TLI);

  /// Emit a call to the fputc_unlocked function. This assumes that Char is an
  /// i32, and File is a pointer to FILE.
  Value *emitFPutCUnlocked(Value *Char, Value *File, IRBuilderBase &B,
                           const TargetLibraryInfo *TLI);

  /// Emit a call to the fputs function. Str is required to be a pointer and
  /// File is a pointer to FILE.
  Value *emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
                   const TargetLibraryInfo *TLI);

  /// Emit a call to the fputs_unlocked function. Str is required to be a
  /// pointer and File is a pointer to FILE.
  Value *emitFPutSUnlocked(Value *Str, Value *File, IRBuilderBase &B,
                           const TargetLibraryInfo *TLI);

  /// Emit a call to the fwrite function. This assumes that Ptr is a pointer,
  /// Size is an 'intptr_t', and File is a pointer to FILE.
  Value *emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
@@ -214,27 +204,6 @@ namespace llvm {
  /// Emit a call to the calloc function.
  Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
                    IRBuilderBase &B, const TargetLibraryInfo &TLI);

  /// Emit a call to the fwrite_unlocked function. This assumes that Ptr is a
  /// pointer, Size is an 'intptr_t', N is nmemb and File is a pointer to FILE.
  Value *emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
                            IRBuilderBase &B, const DataLayout &DL,
                            const TargetLibraryInfo *TLI);

  /// Emit a call to the fgetc_unlocked function. File is a pointer to FILE.
  Value *emitFGetCUnlocked(Value *File, IRBuilderBase &B,
                           const TargetLibraryInfo *TLI);

  /// Emit a call to the fgets_unlocked function. Str is required to be a
  /// pointer, Size is an i32 and File is a pointer to FILE.
  Value *emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
                           IRBuilderBase &B, const TargetLibraryInfo *TLI);

  /// Emit a call to the fread_unlocked function. This assumes that Ptr is a
  /// pointer, Size is an 'intptr_t', N is nmemb and File is a pointer to FILE.
  Value *emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
                           IRBuilderBase &B, const DataLayout &DL,
                           const TargetLibraryInfo *TLI);
}

#endif
+0 −4
Original line number Diff line number Diff line
@@ -220,11 +220,7 @@ private:
  Value *optimizeSnPrintF(CallInst *CI, IRBuilderBase &B);
  Value *optimizeFPrintF(CallInst *CI, IRBuilderBase &B);
  Value *optimizeFWrite(CallInst *CI, IRBuilderBase &B);
  Value *optimizeFRead(CallInst *CI, IRBuilderBase &B);
  Value *optimizeFPuts(CallInst *CI, IRBuilderBase &B);
  Value *optimizeFGets(CallInst *CI, IRBuilderBase &B);
  Value *optimizeFPutc(CallInst *CI, IRBuilderBase &B);
  Value *optimizeFGetc(CallInst *CI, IRBuilderBase &B);
  Value *optimizePuts(CallInst *CI, IRBuilderBase &B);

  // Helper methods
+0 −124
Original line number Diff line number Diff line
@@ -1188,26 +1188,6 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
  return CI;
}

Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilderBase &B,
                               const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fputc_unlocked))
    return nullptr;

  Module *M = B.GetInsertBlock()->getModule();
  StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked);
  FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
                                            B.getInt32Ty(), File->getType());
  if (File->getType()->isPointerTy())
    inferLibFuncAttributes(M, FPutcUnlockedName, *TLI);
  Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari");
  CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName);

  if (const Function *Fn =
          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
    CI->setCallingConv(Fn->getCallingConv());
  return CI;
}

Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
                       const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fputs))
@@ -1227,25 +1207,6 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
  return CI;
}

Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilderBase &B,
                               const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fputs_unlocked))
    return nullptr;

  Module *M = B.GetInsertBlock()->getModule();
  StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked);
  FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
                                            B.getInt8PtrTy(), File->getType());
  if (File->getType()->isPointerTy())
    inferLibFuncAttributes(M, FPutsUnlockedName, *TLI);
  CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName);

  if (const Function *Fn =
          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
    CI->setCallingConv(Fn->getCallingConv());
  return CI;
}

Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
                        const DataLayout &DL, const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fwrite))
@@ -1310,88 +1271,3 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,

  return CI;
}

Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
                                IRBuilderBase &B, const DataLayout &DL,
                                const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fwrite_unlocked))
    return nullptr;

  Module *M = B.GetInsertBlock()->getModule();
  LLVMContext &Context = B.GetInsertBlock()->getContext();
  StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked);
  FunctionCallee F = M->getOrInsertFunction(
      FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
      DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());

  if (File->getType()->isPointerTy())
    inferLibFuncAttributes(M, FWriteUnlockedName, *TLI);
  CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});

  if (const Function *Fn =
          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
    CI->setCallingConv(Fn->getCallingConv());
  return CI;
}

Value *llvm::emitFGetCUnlocked(Value *File, IRBuilderBase &B,
                               const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fgetc_unlocked))
    return nullptr;

  Module *M = B.GetInsertBlock()->getModule();
  StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked);
  FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(),
                                            File->getType());
  if (File->getType()->isPointerTy())
    inferLibFuncAttributes(M, FGetCUnlockedName, *TLI);
  CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName);

  if (const Function *Fn =
          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
    CI->setCallingConv(Fn->getCallingConv());
  return CI;
}

Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
                               IRBuilderBase &B, const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fgets_unlocked))
    return nullptr;

  Module *M = B.GetInsertBlock()->getModule();
  StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked);
  FunctionCallee F =
      M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(),
                             B.getInt8PtrTy(), B.getInt32Ty(), File->getType());
  inferLibFuncAttributes(M, FGetSUnlockedName, *TLI);
  CallInst *CI =
      B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName);

  if (const Function *Fn =
          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
    CI->setCallingConv(Fn->getCallingConv());
  return CI;
}

Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
                               IRBuilderBase &B, const DataLayout &DL,
                               const TargetLibraryInfo *TLI) {
  if (!TLI->has(LibFunc_fread_unlocked))
    return nullptr;

  Module *M = B.GetInsertBlock()->getModule();
  LLVMContext &Context = B.GetInsertBlock()->getContext();
  StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked);
  FunctionCallee F = M->getOrInsertFunction(
      FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
      DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());

  if (File->getType()->isPointerTy())
    inferLibFuncAttributes(M, FReadUnlockedName, *TLI);
  CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});

  if (const Function *Fn =
          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
    CI->setCallingConv(Fn->getCallingConv());
  return CI;
}
+3 −78
Original line number Diff line number Diff line
@@ -138,28 +138,6 @@ static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
  return ConstantInt::get(CI->getType(), Result);
}

static bool isLocallyOpenedFile(Value *File, CallInst *CI,
                                const TargetLibraryInfo *TLI) {
  CallInst *FOpen = dyn_cast<CallInst>(File);
  if (!FOpen)
    return false;

  Function *InnerCallee = FOpen->getCalledFunction();
  if (!InnerCallee)
    return false;

  LibFunc Func;
  if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
      Func != LibFunc_fopen)
    return false;

  inferLibFuncAttributes(*CI->getCalledFunction(), *TLI);
  if (PointerMayBeCaptured(File, true, true))
    return false;

  return true;
}

static bool isOnlyUsedInComparisonWithZero(Value *V) {
  for (User *U : V->users()) {
    if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -2754,11 +2732,6 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
    }
  }

  if (isLocallyOpenedFile(CI->getArgOperand(3), CI, TLI))
    return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
                              CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
                              TLI);

  return nullptr;
}

@@ -2773,15 +2746,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
  if (OptForSize)
    return nullptr;

  // Check if has any use
  if (!CI->use_empty()) {
    if (isLocallyOpenedFile(CI->getArgOperand(1), CI, TLI))
      return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
                               TLI);
    else
  // We can't optimize if return value is used.
  if (!CI->use_empty())
    return nullptr;
  }

  // fputs(s,F) --> fwrite(s,strlen(s),1,F)
  uint64_t Len = GetStringLength(CI->getArgOperand(0));
@@ -2795,40 +2762,6 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
      CI->getArgOperand(1), B, DL, TLI);
}

Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilderBase &B) {
  optimizeErrorReporting(CI, B, 1);

  if (isLocallyOpenedFile(CI->getArgOperand(1), CI, TLI))
    return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
                             TLI);

  return nullptr;
}

Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilderBase &B) {
  if (isLocallyOpenedFile(CI->getArgOperand(0), CI, TLI))
    return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI);

  return nullptr;
}

Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilderBase &B) {
  if (isLocallyOpenedFile(CI->getArgOperand(2), CI, TLI))
    return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
                             CI->getArgOperand(2), B, TLI);

  return nullptr;
}

Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilderBase &B) {
  if (isLocallyOpenedFile(CI->getArgOperand(3), CI, TLI))
    return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
                             CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
                             TLI);

  return nullptr;
}

Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
  annotateNonNullBasedOnAccess(CI, 0);
  if (!CI->use_empty())
@@ -3162,16 +3095,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
      return optimizeFPrintF(CI, Builder);
    case LibFunc_fwrite:
      return optimizeFWrite(CI, Builder);
    case LibFunc_fread:
      return optimizeFRead(CI, Builder);
    case LibFunc_fputs:
      return optimizeFPuts(CI, Builder);
    case LibFunc_fgets:
      return optimizeFGets(CI, Builder);
    case LibFunc_fputc:
      return optimizeFPutc(CI, Builder);
    case LibFunc_fgetc:
      return optimizeFGetc(CI, Builder);
    case LibFunc_puts:
      return optimizePuts(CI, Builder);
    case LibFunc_perror:
+0 −23
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S -mtriple=x86_64-w64-mingw32 | FileCheck %s

%struct._iobuf = type { i8*, i32, i8*, i32, i32, i32, i32, i8* }

@.str = private unnamed_addr constant [5 x i8] c"file\00", align 1
@.str.1 = private unnamed_addr constant [2 x i8] c"w\00", align 1

; Check that this still uses the plain fputc instead of fputc_unlocked
; for MinGW targets.
define void @external_fputc_test() {
; CHECK-LABEL: @external_fputc_test(
; CHECK-NEXT:    [[CALL:%.*]] = call %struct._iobuf* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @fputc(i32 99, %struct._iobuf* [[CALL]])
; CHECK-NEXT:    ret void
;
  %call = call %struct._iobuf* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
  %call1 = call i32 @fputc(i32 99, %struct._iobuf* %call)
  ret void
}

declare %struct._iobuf* @fopen(i8*, i8*)
declare i32 @fputc(i32, %struct._iobuf* nocapture)
Loading