Commit e9ac757f authored by Eli Friedman's avatar Eli Friedman
Browse files

[AArch64] Don't expand memcmp in strict align mode.

7aecf232 fixed the bug where we would miscompile, but we still generate
a crazy amount of code. Turn off the expansion until someone implements
an appropriate heuristic.

Differential Revision: https://reviews.llvm.org/D77599
parent f596ab40
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -629,7 +629,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
AArch64TTIImpl::TTI::MemCmpExpansionOptions
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
  TTI::MemCmpExpansionOptions Options;
  Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
  if (ST->requiresStrictAlign()) {
    // TODO: Add cost modeling for strict align. Misaligned loads expand to
    // a bunch of instructions when strict align is enabled.
    return Options;
  }
  Options.AllowOverlappingLoads = true;
  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
  Options.NumLoadsPerBlock = Options.MaxNumLoads;
  // TODO: Though vector loads usually perform well on AArch64, in some targets
+5 −11
Original line number Diff line number Diff line
@@ -11,12 +11,12 @@ entry:
  ret i1 %ret

; CHECK-LABEL: test_b2:
; CHECK-NOT:   bl bcmp
; CHECKN-NOT:  bl bcmp
; CHECKN:      ldr  x
; CHECKN-NEXT: ldr  x
; CHECKN-NEXT: ldur x
; CHECKN-NEXT: ldur x
; CHECKS-COUNT-30: ldrb w
; CHECKS: bl bcmp
}

define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) {
@@ -26,19 +26,13 @@ entry:
  ret i1 %ret

; CHECK-LABEL: test_b2_align8:
; CHECK-NOT:   bl bcmp
; CHECKN-NOT:  bl bcmp
; CHECKN:      ldr  x
; CHECKN-NEXT: ldr  x
; CHECKN-NEXT: ldur x
; CHECKN-NEXT: ldur x
; CHECKS:      ldr  x
; CHECKS-NEXT: ldr  x
; CHECKS-NEXT: ldr  w
; CHECKS-NEXT: ldr  w
; CHECKS-NEXT: ldrh  w
; CHECKS-NEXT: ldrh  w
; CHECKS-NEXT: ldrb  w
; CHECKS-NEXT: ldrb  w
; TODO: Four loads should be within the limit, but the heuristic isn't implemented.
; CHECKS: bl bcmp
}

define i1 @test_bs(i8* %s1, i8* %s2) optsize {