Commit 96f6785b authored by Sander de Smalen's avatar Sander de Smalen
Browse files

[VectorUtils] Teach findScalarElement to return splat value.

If the vector is a splat of some scalar value, findScalarElement()
can simply return the scalar value if it knows the requested lane
is in the vector.

This is only needed for scalable vectors, because the InsertElement/ShuffleVector
case is already handled explicitly for the fixed-width case.

This helps to recognize an InstCombine fold like:
  extractelt(bitcast(splat(%v))) -> bitcast(%v)

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D107254
parent d669cc73
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -331,6 +331,12 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
      if (Elt->isNullValue())
        return findScalarElement(Val, EltNo);

  // If the vector is a splat then we can trivially find the scalar element.
  if (isa<ScalableVectorType>(VTy))
    if (Value *Splat = getSplatValue(V))
      if (EltNo < VTy->getElementCount().getKnownMinValue())
        return Splat;

  // Otherwise, we don't know.
  return nullptr;
}
+14 −0
Original line number Diff line number Diff line
@@ -271,6 +271,20 @@ define i1 @ext_lane1_from_cmp_with_stepvec(i64 %i) {
  ret i1 %res
}

define i64* @ext_lane_from_bitcast_of_splat(i32* %v) {
; CHECK-LABEL: @ext_lane_from_bitcast_of_splat(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[R:%.*]] = bitcast i32* [[V:%.*]] to i64*
; CHECK-NEXT:    ret i64* [[R]]
;
entry:
  %in = insertelement <vscale x 4 x i32*> poison, i32* %v, i32 0
  %splat = shufflevector <vscale x 4 x i32*> %in, <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
  %bc = bitcast <vscale x 4 x i32*> %splat to <vscale x 4 x i64*>
  %r = extractelement <vscale x 4 x i64*> %bc, i32 3
  ret i64* %r
}

declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()