Unverified Commit 0605d2c1 authored by Alexey Bataev's avatar Alexey Bataev Committed by GitHub
Browse files

[SLP][NFC]Add a test with the incorrect cross-loop vectorization, NFC

parent 7d853a22
Loading
Loading
Loading
Loading
+412 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=slp-vectorizer -slp-threshold=-500 -mtriple=x86_64-unknown-linux-gnu \
; RUN:     -pass-remarks-output=%t -S < %s | FileCheck %s
; RUN: FileCheck --input-file=%t %s --check-prefix=YAML

; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
; YAML-NEXT: Function:        sibling_root_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '31'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
; YAML-NEXT: Function:        sibling_root_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '29'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
define void @sibling_root_loops_mismatched_tripcount(ptr %dst, i64 %n, i64 %m, i32 %sel) {
; CHECK-LABEL: define void @sibling_root_loops_mismatched_tripcount(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]], i32 [[SEL:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*]]:
; CHECK-NEXT:    switch i32 [[SEL]], label %[[LOOP_C:.*]] [
; CHECK-NEXT:      i32 0, label %[[LOOP_A:.*]]
; CHECK-NEXT:      i32 1, label %[[LOOP_B:.*]]
; CHECK-NEXT:    ]
; CHECK:       [[LOOP_A]]:
; CHECK-NEXT:    [[JA:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[JA_NEXT:%.*]], %[[LOOP_A]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[JA]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], <i64 2, i64 3>
; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i64> [[TMP1]], <i64 5, i64 7>
; CHECK-NEXT:    [[JA_NEXT]] = add i64 [[JA]], 1
; CHECK-NEXT:    [[CA:%.*]] = icmp ult i64 [[JA_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CA]], label %[[LOOP_A]], label %[[MERGE:.*]]
; CHECK:       [[LOOP_B]]:
; CHECK-NEXT:    [[JB:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[JB_NEXT:%.*]], %[[LOOP_B]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[JB]], i32 0
; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP6:%.*]] = mul <2 x i64> [[TMP5]], <i64 11, i64 13>
; CHECK-NEXT:    [[TMP7:%.*]] = mul <2 x i64> [[TMP5]], <i64 17, i64 19>
; CHECK-NEXT:    [[JB_NEXT]] = add i64 [[JB]], 1
; CHECK-NEXT:    [[CB:%.*]] = icmp ult i64 [[JB_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CB]], label %[[LOOP_B]], label %[[MERGE]]
; CHECK:       [[LOOP_C]]:
; CHECK-NEXT:    [[JC:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[JC_NEXT:%.*]], %[[LOOP_C]] ]
; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[JC]], i32 0
; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP10:%.*]] = mul <2 x i64> [[TMP9]], <i64 23, i64 29>
; CHECK-NEXT:    [[TMP11:%.*]] = mul <2 x i64> [[TMP9]], <i64 31, i64 37>
; CHECK-NEXT:    [[JC_NEXT]] = add i64 [[JC]], 1
; CHECK-NEXT:    [[CC:%.*]] = icmp ult i64 [[JC_NEXT]], [[M]]
; CHECK-NEXT:    br i1 [[CC]], label %[[LOOP_C]], label %[[MERGE]]
; CHECK:       [[MERGE]]:
; CHECK-NEXT:    [[TMP12:%.*]] = phi <2 x i64> [ [[TMP2]], %[[LOOP_A]] ], [ [[TMP6]], %[[LOOP_B]] ], [ [[TMP10]], %[[LOOP_C]] ]
; CHECK-NEXT:    [[TMP13:%.*]] = phi <2 x i64> [ [[TMP3]], %[[LOOP_A]] ], [ [[TMP7]], %[[LOOP_B]] ], [ [[TMP11]], %[[LOOP_C]] ]
; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 0
; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 2
; CHECK-NEXT:    store <2 x i64> [[TMP12]], ptr [[G1]], align 8
; CHECK-NEXT:    store <2 x i64> [[TMP13]], ptr [[G3]], align 8
; CHECK-NEXT:    ret void
;
entry:
  switch i32 %sel, label %loop.c [
  i32 0, label %loop.a
  i32 1, label %loop.b
  ]

loop.a:
  %ja = phi i64 [ 0, %entry ], [ %ja.next, %loop.a ]
  %va1 = mul i64 %ja, 2
  %va2 = mul i64 %ja, 3
  %va3 = mul i64 %ja, 5
  %va4 = mul i64 %ja, 7
  %ja.next = add i64 %ja, 1
  %ca = icmp ult i64 %ja.next, %n
  br i1 %ca, label %loop.a, label %merge

loop.b:
  %jb = phi i64 [ 0, %entry ], [ %jb.next, %loop.b ]
  %vb1 = mul i64 %jb, 11
  %vb2 = mul i64 %jb, 13
  %vb3 = mul i64 %jb, 17
  %vb4 = mul i64 %jb, 19
  %jb.next = add i64 %jb, 1
  %cb = icmp ult i64 %jb.next, %n
  br i1 %cb, label %loop.b, label %merge

loop.c:
  %jc = phi i64 [ 0, %entry ], [ %jc.next, %loop.c ]
  %vc1 = mul i64 %jc, 23
  %vc2 = mul i64 %jc, 29
  %vc3 = mul i64 %jc, 31
  %vc4 = mul i64 %jc, 37
  %jc.next = add i64 %jc, 1
  %cc = icmp ult i64 %jc.next, %m
  br i1 %cc, label %loop.c, label %merge

merge:
  %p1 = phi i64 [ %va1, %loop.a ], [ %vb1, %loop.b ], [ %vc1, %loop.c ]
  %p2 = phi i64 [ %va2, %loop.a ], [ %vb2, %loop.b ], [ %vc2, %loop.c ]
  %p3 = phi i64 [ %va3, %loop.a ], [ %vb3, %loop.b ], [ %vc3, %loop.c ]
  %p4 = phi i64 [ %va4, %loop.a ], [ %vb4, %loop.b ], [ %vc4, %loop.c ]
  %g1 = getelementptr inbounds i64, ptr %dst, i64 0
  %g2 = getelementptr inbounds i64, ptr %dst, i64 1
  %g3 = getelementptr inbounds i64, ptr %dst, i64 2
  %g4 = getelementptr inbounds i64, ptr %dst, i64 3
  store i64 %p1, ptr %g1, align 8
  store i64 %p2, ptr %g2, align 8
  store i64 %p3, ptr %g3, align 8
  store i64 %p4, ptr %g4, align 8
  ret void
}

; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
; YAML-NEXT: Function:        sibling_inner_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '200'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
; YAML-NEXT: Function:        sibling_inner_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '200'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
define void @sibling_inner_loops_mismatched_tripcount(ptr %dst, i64 %n, i64 %m, i32 %sel) {
; CHECK-LABEL: define void @sibling_inner_loops_mismatched_tripcount(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]], i32 [[SEL:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*]]:
; CHECK-NEXT:    br label %[[OUTER_HEADER:.*]]
; CHECK:       [[OUTER_HEADER]]:
; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT:    switch i32 [[SEL]], label %[[INNER_C:.*]] [
; CHECK-NEXT:      i32 0, label %[[INNER_A:.*]]
; CHECK-NEXT:      i32 1, label %[[INNER_B:.*]]
; CHECK-NEXT:    ]
; CHECK:       [[INNER_A]]:
; CHECK-NEXT:    [[JA:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[JA_NEXT:%.*]], %[[INNER_A]] ]
; CHECK-NEXT:    [[VA1:%.*]] = mul i64 [[JA]], 2
; CHECK-NEXT:    [[VA2:%.*]] = mul i64 [[JA]], 3
; CHECK-NEXT:    [[VA3:%.*]] = mul i64 [[JA]], 5
; CHECK-NEXT:    [[VA4:%.*]] = mul i64 [[JA]], 7
; CHECK-NEXT:    [[JA_NEXT]] = add i64 [[JA]], 1
; CHECK-NEXT:    [[CA:%.*]] = icmp ult i64 [[JA_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CA]], label %[[INNER_A]], label %[[MERGE:.*]]
; CHECK:       [[INNER_B]]:
; CHECK-NEXT:    [[JB:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[JB_NEXT:%.*]], %[[INNER_B]] ]
; CHECK-NEXT:    [[VB1:%.*]] = mul i64 [[JB]], 11
; CHECK-NEXT:    [[VB2:%.*]] = mul i64 [[JB]], 13
; CHECK-NEXT:    [[VB3:%.*]] = mul i64 [[JB]], 17
; CHECK-NEXT:    [[VB4:%.*]] = mul i64 [[JB]], 19
; CHECK-NEXT:    [[JB_NEXT]] = add i64 [[JB]], 1
; CHECK-NEXT:    [[CB:%.*]] = icmp ult i64 [[JB_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CB]], label %[[INNER_B]], label %[[MERGE]]
; CHECK:       [[INNER_C]]:
; CHECK-NEXT:    [[JC:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[JC_NEXT:%.*]], %[[INNER_C]] ]
; CHECK-NEXT:    [[VC1:%.*]] = mul i64 [[JC]], 23
; CHECK-NEXT:    [[VC2:%.*]] = mul i64 [[JC]], 29
; CHECK-NEXT:    [[VC3:%.*]] = mul i64 [[JC]], 31
; CHECK-NEXT:    [[VC4:%.*]] = mul i64 [[JC]], 37
; CHECK-NEXT:    [[JC_NEXT]] = add i64 [[JC]], 1
; CHECK-NEXT:    [[CC:%.*]] = icmp ult i64 [[JC_NEXT]], [[M]]
; CHECK-NEXT:    br i1 [[CC]], label %[[INNER_C]], label %[[MERGE]]
; CHECK:       [[MERGE]]:
; CHECK-NEXT:    [[P1:%.*]] = phi i64 [ [[VA1]], %[[INNER_A]] ], [ [[VB1]], %[[INNER_B]] ], [ [[VC1]], %[[INNER_C]] ]
; CHECK-NEXT:    [[P2:%.*]] = phi i64 [ [[VA2]], %[[INNER_A]] ], [ [[VB2]], %[[INNER_B]] ], [ [[VC2]], %[[INNER_C]] ]
; CHECK-NEXT:    [[P3:%.*]] = phi i64 [ [[VA3]], %[[INNER_A]] ], [ [[VB3]], %[[INNER_B]] ], [ [[VC3]], %[[INNER_C]] ]
; CHECK-NEXT:    [[P4:%.*]] = phi i64 [ [[VA4]], %[[INNER_A]] ], [ [[VB4]], %[[INNER_B]] ], [ [[VC4]], %[[INNER_C]] ]
; CHECK-NEXT:    [[BASE:%.*]] = mul i64 [[I]], 4
; CHECK-NEXT:    [[OFF3:%.*]] = add i64 [[BASE]], 2
; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[BASE]]
; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFF3]]
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[P1]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[P2]], i32 1
; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr [[G1]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[P3]], i32 0
; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[P4]], i32 1
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[G3]], align 8
; CHECK-NEXT:    br label %[[OUTER_LATCH]]
; CHECK:       [[OUTER_LATCH]]:
; CHECK-NEXT:    [[I_NEXT]] = add i64 [[I]], 1
; CHECK-NEXT:    [[CO:%.*]] = icmp ult i64 [[I_NEXT]], 100
; CHECK-NEXT:    br i1 [[CO]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; CHECK:       [[EXIT]]:
; CHECK-NEXT:    ret void
;
entry:
  br label %outer.header

outer.header:
  %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
  switch i32 %sel, label %inner.c [
  i32 0, label %inner.a
  i32 1, label %inner.b
  ]

inner.a:
  %ja = phi i64 [ 0, %outer.header ], [ %ja.next, %inner.a ]
  %va1 = mul i64 %ja, 2
  %va2 = mul i64 %ja, 3
  %va3 = mul i64 %ja, 5
  %va4 = mul i64 %ja, 7
  %ja.next = add i64 %ja, 1
  %ca = icmp ult i64 %ja.next, %n
  br i1 %ca, label %inner.a, label %merge

inner.b:
  %jb = phi i64 [ 0, %outer.header ], [ %jb.next, %inner.b ]
  %vb1 = mul i64 %jb, 11
  %vb2 = mul i64 %jb, 13
  %vb3 = mul i64 %jb, 17
  %vb4 = mul i64 %jb, 19
  %jb.next = add i64 %jb, 1
  %cb = icmp ult i64 %jb.next, %n
  br i1 %cb, label %inner.b, label %merge

inner.c:
  %jc = phi i64 [ 0, %outer.header ], [ %jc.next, %inner.c ]
  %vc1 = mul i64 %jc, 23
  %vc2 = mul i64 %jc, 29
  %vc3 = mul i64 %jc, 31
  %vc4 = mul i64 %jc, 37
  %jc.next = add i64 %jc, 1
  %cc = icmp ult i64 %jc.next, %m
  br i1 %cc, label %inner.c, label %merge

merge:
  %p1 = phi i64 [ %va1, %inner.a ], [ %vb1, %inner.b ], [ %vc1, %inner.c ]
  %p2 = phi i64 [ %va2, %inner.a ], [ %vb2, %inner.b ], [ %vc2, %inner.c ]
  %p3 = phi i64 [ %va3, %inner.a ], [ %vb3, %inner.b ], [ %vc3, %inner.c ]
  %p4 = phi i64 [ %va4, %inner.a ], [ %vb4, %inner.b ], [ %vc4, %inner.c ]
  %base = mul i64 %i, 4
  %off2 = add i64 %base, 1
  %off3 = add i64 %base, 2
  %off4 = add i64 %base, 3
  %g1 = getelementptr inbounds i64, ptr %dst, i64 %base
  %g2 = getelementptr inbounds i64, ptr %dst, i64 %off2
  %g3 = getelementptr inbounds i64, ptr %dst, i64 %off3
  %g4 = getelementptr inbounds i64, ptr %dst, i64 %off4
  store i64 %p1, ptr %g1, align 8
  store i64 %p2, ptr %g2, align 8
  store i64 %p3, ptr %g3, align 8
  store i64 %p4, ptr %g4, align 8
  br label %outer.latch

outer.latch:
  %i.next = add i64 %i, 1
  %co = icmp ult i64 %i.next, 100
  br i1 %co, label %outer.header, label %exit

exit:
  ret void
}

; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
; YAML-NEXT: Function:        sibling_inner_loops_matching_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '200'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
; YAML-NEXT: Function:        sibling_inner_loops_matching_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '200'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
define void @sibling_inner_loops_matching_tripcount(ptr %dst, i64 %n, i32 %sel) {
; CHECK-LABEL: define void @sibling_inner_loops_matching_tripcount(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i32 [[SEL:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*]]:
; CHECK-NEXT:    br label %[[OUTER_HEADER:.*]]
; CHECK:       [[OUTER_HEADER]]:
; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT:    switch i32 [[SEL]], label %[[INNER_C:.*]] [
; CHECK-NEXT:      i32 0, label %[[INNER_A:.*]]
; CHECK-NEXT:      i32 1, label %[[INNER_B:.*]]
; CHECK-NEXT:    ]
; CHECK:       [[INNER_A]]:
; CHECK-NEXT:    [[JA:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[JA_NEXT:%.*]], %[[INNER_A]] ]
; CHECK-NEXT:    [[VA1:%.*]] = mul i64 [[JA]], 2
; CHECK-NEXT:    [[VA2:%.*]] = mul i64 [[JA]], 3
; CHECK-NEXT:    [[VA3:%.*]] = mul i64 [[JA]], 5
; CHECK-NEXT:    [[VA4:%.*]] = mul i64 [[JA]], 7
; CHECK-NEXT:    [[JA_NEXT]] = add i64 [[JA]], 1
; CHECK-NEXT:    [[CA:%.*]] = icmp ult i64 [[JA_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CA]], label %[[INNER_A]], label %[[MERGE:.*]]
; CHECK:       [[INNER_B]]:
; CHECK-NEXT:    [[JB:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[JB_NEXT:%.*]], %[[INNER_B]] ]
; CHECK-NEXT:    [[VB1:%.*]] = mul i64 [[JB]], 11
; CHECK-NEXT:    [[VB2:%.*]] = mul i64 [[JB]], 13
; CHECK-NEXT:    [[VB3:%.*]] = mul i64 [[JB]], 17
; CHECK-NEXT:    [[VB4:%.*]] = mul i64 [[JB]], 19
; CHECK-NEXT:    [[JB_NEXT]] = add i64 [[JB]], 1
; CHECK-NEXT:    [[CB:%.*]] = icmp ult i64 [[JB_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CB]], label %[[INNER_B]], label %[[MERGE]]
; CHECK:       [[INNER_C]]:
; CHECK-NEXT:    [[JC:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[JC_NEXT:%.*]], %[[INNER_C]] ]
; CHECK-NEXT:    [[VC1:%.*]] = mul i64 [[JC]], 23
; CHECK-NEXT:    [[VC2:%.*]] = mul i64 [[JC]], 29
; CHECK-NEXT:    [[VC3:%.*]] = mul i64 [[JC]], 31
; CHECK-NEXT:    [[VC4:%.*]] = mul i64 [[JC]], 37
; CHECK-NEXT:    [[JC_NEXT]] = add i64 [[JC]], 1
; CHECK-NEXT:    [[CC:%.*]] = icmp ult i64 [[JC_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CC]], label %[[INNER_C]], label %[[MERGE]]
; CHECK:       [[MERGE]]:
; CHECK-NEXT:    [[P1:%.*]] = phi i64 [ [[VA1]], %[[INNER_A]] ], [ [[VB1]], %[[INNER_B]] ], [ [[VC1]], %[[INNER_C]] ]
; CHECK-NEXT:    [[P2:%.*]] = phi i64 [ [[VA2]], %[[INNER_A]] ], [ [[VB2]], %[[INNER_B]] ], [ [[VC2]], %[[INNER_C]] ]
; CHECK-NEXT:    [[P3:%.*]] = phi i64 [ [[VA3]], %[[INNER_A]] ], [ [[VB3]], %[[INNER_B]] ], [ [[VC3]], %[[INNER_C]] ]
; CHECK-NEXT:    [[P4:%.*]] = phi i64 [ [[VA4]], %[[INNER_A]] ], [ [[VB4]], %[[INNER_B]] ], [ [[VC4]], %[[INNER_C]] ]
; CHECK-NEXT:    [[BASE:%.*]] = mul i64 [[I]], 4
; CHECK-NEXT:    [[OFF3:%.*]] = add i64 [[BASE]], 2
; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[BASE]]
; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFF3]]
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[P1]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[P2]], i32 1
; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr [[G1]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[P3]], i32 0
; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[P4]], i32 1
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[G3]], align 8
; CHECK-NEXT:    br label %[[OUTER_LATCH]]
; CHECK:       [[OUTER_LATCH]]:
; CHECK-NEXT:    [[I_NEXT]] = add i64 [[I]], 1
; CHECK-NEXT:    [[CO:%.*]] = icmp ult i64 [[I_NEXT]], 100
; CHECK-NEXT:    br i1 [[CO]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; CHECK:       [[EXIT]]:
; CHECK-NEXT:    ret void
;
entry:
  br label %outer.header

outer.header:
  %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
  switch i32 %sel, label %inner.c [
  i32 0, label %inner.a
  i32 1, label %inner.b
  ]

inner.a:
  %ja = phi i64 [ 0, %outer.header ], [ %ja.next, %inner.a ]
  %va1 = mul i64 %ja, 2
  %va2 = mul i64 %ja, 3
  %va3 = mul i64 %ja, 5
  %va4 = mul i64 %ja, 7
  %ja.next = add i64 %ja, 1
  %ca = icmp ult i64 %ja.next, %n
  br i1 %ca, label %inner.a, label %merge

inner.b:
  %jb = phi i64 [ 0, %outer.header ], [ %jb.next, %inner.b ]
  %vb1 = mul i64 %jb, 11
  %vb2 = mul i64 %jb, 13
  %vb3 = mul i64 %jb, 17
  %vb4 = mul i64 %jb, 19
  %jb.next = add i64 %jb, 1
  %cb = icmp ult i64 %jb.next, %n
  br i1 %cb, label %inner.b, label %merge

inner.c:
  %jc = phi i64 [ 0, %outer.header ], [ %jc.next, %inner.c ]
  %vc1 = mul i64 %jc, 23
  %vc2 = mul i64 %jc, 29
  %vc3 = mul i64 %jc, 31
  %vc4 = mul i64 %jc, 37
  %jc.next = add i64 %jc, 1
  %cc = icmp ult i64 %jc.next, %n
  br i1 %cc, label %inner.c, label %merge

merge:
  %p1 = phi i64 [ %va1, %inner.a ], [ %vb1, %inner.b ], [ %vc1, %inner.c ]
  %p2 = phi i64 [ %va2, %inner.a ], [ %vb2, %inner.b ], [ %vc2, %inner.c ]
  %p3 = phi i64 [ %va3, %inner.a ], [ %vb3, %inner.b ], [ %vc3, %inner.c ]
  %p4 = phi i64 [ %va4, %inner.a ], [ %vb4, %inner.b ], [ %vc4, %inner.c ]
  %base = mul i64 %i, 4
  %off2 = add i64 %base, 1
  %off3 = add i64 %base, 2
  %off4 = add i64 %base, 3
  %g1 = getelementptr inbounds i64, ptr %dst, i64 %base
  %g2 = getelementptr inbounds i64, ptr %dst, i64 %off2
  %g3 = getelementptr inbounds i64, ptr %dst, i64 %off3
  %g4 = getelementptr inbounds i64, ptr %dst, i64 %off4
  store i64 %p1, ptr %g1, align 8
  store i64 %p2, ptr %g2, align 8
  store i64 %p3, ptr %g3, align 8
  store i64 %p4, ptr %g4, align 8
  br label %outer.latch

outer.latch:
  %i.next = add i64 %i, 1
  %co = icmp ult i64 %i.next, 100
  br i1 %co, label %outer.header, label %exit

exit:
  ret void
}