Commit f85e06f6 authored by Renato Golin's avatar Renato Golin
Browse files

Merge r261331: avoid out of bounds loads for interleaved access vectorization

llvm-svn: 261341
parent e0c1ea44
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -4636,6 +4636,8 @@ void InterleavedAccessInfo::analyzeInterleaving(

  // Holds all interleaved store groups temporarily.
  SmallSetVector<InterleaveGroup *, 4> StoreGroups;
  // Holds all interleaved load groups temporarily.
  SmallSetVector<InterleaveGroup *, 4> LoadGroups;

  // Search the load-load/write-write pair B-A in bottom-up order and try to
  // insert B into the interleave group of A according to 3 rules:
@@ -4663,6 +4665,8 @@ void InterleavedAccessInfo::analyzeInterleaving(

    if (A->mayWriteToMemory())
      StoreGroups.insert(Group);
    else
      LoadGroups.insert(Group);

    for (auto II = std::next(I); II != E; ++II) {
      Instruction *B = II->first;
@@ -4710,6 +4714,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
  for (InterleaveGroup *Group : StoreGroups)
    if (Group->getNumMembers() != Group->getFactor())
      releaseGroup(Group);

  // Remove interleaved load groups that don't have the first and last member.
  // This guarantees that we won't do speculative out of bounds loads.
  for (InterleaveGroup *Group : LoadGroups)
    if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1))
      releaseGroup(Group);
}

LoopVectorizationCostModel::VectorizationFactor
+7 −1
Original line number Diff line number Diff line
@@ -16,9 +16,15 @@ for.cond.cleanup: ; preds = %for.body
for.body:                                         ; preds = %for.body, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %0 = shl nsw i64 %indvars.iv, 1
  %odd.idx = add nsw i64 %0, 1

  %arrayidx = getelementptr inbounds double, double* %b, i64 %0
  %arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx

  %1 = load double, double* %arrayidx, align 8
  %add = fadd double %1, 1.000000e+00
  %2 = load double, double* %arrayidx.odd, align 8

  %add = fadd double %1, %2
  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
  store double %add, double* %arrayidx2, align 8
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+2 −4
Original line number Diff line number Diff line
@@ -292,10 +292,8 @@ for.body: ; preds = %for.body, %entry
; }

; CHECK-LABEL: @even_load(
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK-NOT: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>

define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
entry: