Unverified Commit 6d89cd85 authored by Paul Walker's avatar Paul Walker Committed by GitHub
Browse files

[LLVM][SelectionDAG] Don't assume masked loads access all lanes in memory. (#192706)

When creating the initial DAG for masked loads we are using the result
type to define LocationSize. However, being a masked load we do not know
which, if any, memory locations will be read.

Fixes https://github.com/llvm/llvm-project/issues/180251
parent 48ee9c82
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -5122,7 +5122,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {

  MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
      MachinePointerInfo(PtrOperand), MMOFlags,
      VT.getStoreSize(), Alignment, AAInfo, Ranges);
      LocationSize::upperBound(VT.getStoreSize()), Alignment, AAInfo, Ranges);

  const auto &TLI = DAG.getTargetLoweringInfo();

+32 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mattr=+sve < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

define <vscale x 16 x i8> @reverse_masked_load() nounwind {
; CHECK-LABEL: reverse_masked_load:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sub sp, sp, #16
; CHECK-NEXT:    ptrue p0.b, vl6
; CHECK-NEXT:    mov w8, #1799 // =0x707
; CHECK-NEXT:    add x9, sp, #8
; CHECK-NEXT:    strh w8, [sp, #12]
; CHECK-NEXT:    mov w8, #117901063 // =0x7070707
; CHECK-NEXT:    rev p0.b, p0.b
; CHECK-NEXT:    str w8, [sp, #8]
; CHECK-NEXT:    add x8, x9, #6
; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, #-1, mul vl]
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ret
entry:
  %l_2 = alloca [6 x i8], align 1
  call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(6) %l_2, i8 7, i64 6, i1 false)
  %1 = getelementptr inbounds nuw i8, ptr %l_2, i64 6
  %2 = tail call i64 @llvm.vscale.i64()
  %3 = mul nsw i64 %2, -16
  %5 = getelementptr i8, ptr %1, i64 %3
  %active.lane.mask = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask(i64 0, i64 6)
  %reverse = tail call <vscale x 16 x i1> @llvm.vector.reverse(<vscale x 16 x i1> %active.lane.mask)
  %masked.load = call <vscale x 16 x i8> @llvm.masked.load(ptr align 1 %5, <vscale x 16 x i1> %reverse, <vscale x 16 x i8> zeroinitializer)
  ret <vscale x 16 x i8> %masked.load
}