Unverified Commit a6470d6d authored by Florian Hahn's avatar Florian Hahn Committed by GitHub
Browse files

[VPlan] Bail out on recipes without live-outs in narrowIG. (#195729)

When narrowing interleave groups, recipes with users outside the loop
region are not be handled properly. We would need to properly check if
the operations can be narrowed in a way that serve the correct results
to the users.

For now, just bail out to fix miscompiles/crashes.
parent 9e095580
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -5333,6 +5333,16 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
    if (R.mayWriteToMemory() && !InterleaveR)
      return nullptr;

    // Bail out if any recipe defines a vector value used outside the
    // vector loop region.
    if (any_of(R.definedValues(), [&](VPValue *V) {
          return any_of(V->users(), [&](VPUser *U) {
            auto *UR = cast<VPRecipeBase>(U);
            return UR->getParent()->getParent() != VectorLoop;
          });
        }))
      return nullptr;

    // All other ops are allowed, but we reject uses that cannot be converted
    // when checking all allowed consumers (store interleave groups) below.
    if (!InterleaveR)
+198 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 6
; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF4 %s

target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx"

define i64 @test_wide_op_live_out_constant_store_group(ptr noalias %res, ptr noalias %A) {
; VF2-LABEL: define i64 @test_wide_op_live_out_constant_store_group(
; VF2-SAME: ptr noalias [[RES:%.*]], ptr noalias [[A:%.*]]) {
; VF2-NEXT:  [[ENTRY:.*:]]
; VF2-NEXT:    br label %[[VECTOR_PH:.*]]
; VF2:       [[VECTOR_PH]]:
; VF2-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF2:       [[VECTOR_BODY]]:
; VF2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RES]], i64 [[INDEX]]
; VF2-NEXT:    store <2 x i64> zeroinitializer, ptr [[TMP0]], align 8
; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
; VF2-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 99
; VF2-NEXT:    br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF2:       [[MIDDLE_BLOCK]]:
; VF2-NEXT:    br label %[[SCALAR_PH:.*]]
; VF2:       [[SCALAR_PH]]:
;
; VF4-LABEL: define i64 @test_wide_op_live_out_constant_store_group(
; VF4-SAME: ptr noalias [[RES:%.*]], ptr noalias [[A:%.*]]) {
; VF4-NEXT:  [[ENTRY:.*:]]
; VF4-NEXT:    br label %[[VECTOR_PH:.*]]
; VF4:       [[VECTOR_PH]]:
; VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF4:       [[VECTOR_BODY]]:
; VF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RES]], i64 [[INDEX]]
; VF4-NEXT:    store <8 x i64> zeroinitializer, ptr [[TMP0]], align 8
; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
; VF4-NEXT:    br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF4:       [[MIDDLE_BLOCK]]:
; VF4-NEXT:    br label %[[SCALAR_PH:.*]]
; VF4:       [[SCALAR_PH]]:
;
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.A.0 = getelementptr inbounds nuw { i64, i64 }, ptr %A, i64 %iv
  %l.0 = load i64, ptr %gep.A.0, align 8
  %add = add i64 %l.0, 1
  %gep.res.0 = getelementptr inbounds nuw { i64, i64 }, ptr %res, i64 %iv
  store i64 0, ptr %gep.res.0, align 8
  %gep.res.1 = getelementptr inbounds nuw i8, ptr %gep.res.0, i64 8
  store i64 0, ptr %gep.res.1, align 8
  %iv.next = add i64 %iv, 1
  %ec = icmp eq i64 %iv.next, 100
  br i1 %ec, label %exit, label %loop

exit:
  ret i64 %add
}

define i64 @test_wide_op_live_out(ptr noalias %res, ptr noalias %A) {
; VF2-LABEL: define i64 @test_wide_op_live_out(
; VF2-SAME: ptr noalias [[RES:%.*]], ptr noalias [[A:%.*]]) {
; VF2-NEXT:  [[ENTRY:.*:]]
; VF2-NEXT:    br label %[[VECTOR_PH:.*]]
; VF2:       [[VECTOR_PH]]:
; VF2-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF2:       [[VECTOR_BODY]]:
; VF2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i64 [[INDEX]]
; VF2-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; VF2-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
; VF2-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
; VF2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RES]], i64 [[INDEX]]
; VF2-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; VF2-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
; VF2-NEXT:    store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; VF2-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; VF2:       [[MIDDLE_BLOCK]]:
; VF2-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
; VF2-NEXT:    br label %[[EXIT:.*]]
; VF2:       [[EXIT]]:
; VF2-NEXT:    ret i64 [[TMP4]]
;
; VF4-LABEL: define i64 @test_wide_op_live_out(
; VF4-SAME: ptr noalias [[RES:%.*]], ptr noalias [[A:%.*]]) {
; VF4-NEXT:  [[ENTRY:.*:]]
; VF4-NEXT:    br label %[[VECTOR_PH:.*]]
; VF4:       [[VECTOR_PH]]:
; VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF4:       [[VECTOR_BODY]]:
; VF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i64 [[INDEX]]
; VF4-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP0]], align 8
; VF4-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; VF4-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; VF4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RES]], i64 [[INDEX]]
; VF4-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[STRIDED_VEC]], <4 x i64> [[STRIDED_VEC1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; VF4-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; VF4-NEXT:    store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; VF4-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; VF4:       [[MIDDLE_BLOCK]]:
; VF4-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
; VF4-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i64 3
; VF4-NEXT:    br label %[[EXIT:.*]]
; VF4:       [[EXIT]]:
; VF4-NEXT:    ret i64 [[TMP5]]
;
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.A.0 = getelementptr inbounds nuw { i64, i64 }, ptr %A, i64 %iv
  %gep.A.1 = getelementptr inbounds nuw { i64, i64 } , ptr %A, i64 %iv, i32 1
  %l.0 = load i64, ptr %gep.A.0, align 8
  %l.1 = load i64, ptr %gep.A.1, align 8
  %add = add i64 %l.0, 1
  %gep.res.0 = getelementptr inbounds nuw { i64, i64 }, ptr %res, i64 %iv
  store i64 %l.0, ptr %gep.res.0, align 8
  %gep.res.1 = getelementptr inbounds nuw i8, ptr %gep.res.0, i64 8
  store i64 %l.1, ptr %gep.res.1, align 8
  %iv.next = add i64 %iv, 1
  %ec = icmp eq i64 %iv.next, 100
  br i1 %ec, label %exit, label %loop

exit:
  ret i64 %add
}

define i64 @test_wide_load_live_out_constant_store_group(ptr noalias %res, ptr noalias %A) {
; VF2-LABEL: define i64 @test_wide_load_live_out_constant_store_group(
; VF2-SAME: ptr noalias [[RES:%.*]], ptr noalias [[A:%.*]]) {
; VF2-NEXT:  [[ENTRY:.*:]]
; VF2-NEXT:    br label %[[VECTOR_PH:.*]]
; VF2:       [[VECTOR_PH]]:
; VF2-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF2:       [[VECTOR_BODY]]:
; VF2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]]
; VF2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
; VF2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RES]], i64 [[INDEX]]
; VF2-NEXT:    store <4 x i64> zeroinitializer, ptr [[TMP1]], align 8
; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; VF2-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VF2:       [[MIDDLE_BLOCK]]:
; VF2-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i64 1
; VF2-NEXT:    br label %[[EXIT:.*]]
; VF2:       [[EXIT]]:
; VF2-NEXT:    ret i64 [[TMP3]]
;
; VF4-LABEL: define i64 @test_wide_load_live_out_constant_store_group(
; VF4-SAME: ptr noalias [[RES:%.*]], ptr noalias [[A:%.*]]) {
; VF4-NEXT:  [[ENTRY:.*:]]
; VF4-NEXT:    br label %[[VECTOR_PH:.*]]
; VF4:       [[VECTOR_PH]]:
; VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF4:       [[VECTOR_BODY]]:
; VF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]]
; VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; VF4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RES]], i64 [[INDEX]]
; VF4-NEXT:    store <8 x i64> zeroinitializer, ptr [[TMP1]], align 8
; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; VF4-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VF4:       [[MIDDLE_BLOCK]]:
; VF4-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[WIDE_LOAD]], i64 3
; VF4-NEXT:    br label %[[EXIT:.*]]
; VF4:       [[EXIT]]:
; VF4-NEXT:    ret i64 [[TMP3]]
;
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %gep.A = getelementptr inbounds nuw i64, ptr %A, i64 %iv
  %l = load i64, ptr %gep.A, align 8
  %gep.res.0 = getelementptr inbounds nuw { i64, i64 }, ptr %res, i64 %iv
  store i64 0, ptr %gep.res.0, align 8
  %gep.res.1 = getelementptr inbounds nuw i8, ptr %gep.res.0, i64 8
  store i64 0, ptr %gep.res.1, align 8
  %iv.next = add i64 %iv, 1
  %ec = icmp eq i64 %iv.next, 100
  br i1 %ec, label %exit, label %loop

exit:
  ret i64 %l
}