Commit 81ee4844 authored by Ulrich Weigand's avatar Ulrich Weigand
Browse files

[FPEnv] Fix chain handling regression after 04a86966

Code in getRoot made the assumption that every node in PendingLoads
must always itself have a dependency on the current DAG root node.

After the changes in 04a86966, it turns out that this assumption no
longer holds true, causing wrong codegen in some cases (e.g. stores
after constrained FP intrinsics might get deleted).

To fix this, we now need to make sure that the TokenFactor created
by getRoot always includes the previous root, if there is no implicit
dependency already present.

The original getControlRoot code already has exactly this check,
so this patch simply reuses that code now for getRoot as well.
This fixes the regression.

NFC if no constrained FP intrinsic is present.
parent df186507
Loading
Loading
Loading
Loading
+28 −34
Original line number Diff line number Diff line
@@ -1037,24 +1037,41 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() {
  DanglingDebugInfoMap.clear();
}

SDValue SelectionDAGBuilder::getMemoryRoot() {
  if (PendingLoads.empty())
    return DAG.getRoot();
// Update DAG root to include dependencies on Pending chains.
SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
  SDValue Root = DAG.getRoot();

  if (PendingLoads.size() == 1) {
    SDValue Root = PendingLoads[0];
    DAG.setRoot(Root);
    PendingLoads.clear();
  if (Pending.empty())
    return Root;

  // Add current root to PendingChains, unless we already indirectly
  // depend on it.
  if (Root.getOpcode() != ISD::EntryToken) {
    unsigned i = 0, e = Pending.size();
    for (; i != e; ++i) {
      assert(Pending[i].getNode()->getNumOperands() > 1);
      if (Pending[i].getNode()->getOperand(0) == Root)
        break;  // Don't add the root if we already indirectly depend on it.
    }

  // Otherwise, we have to make a token factor node.
  SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
  PendingLoads.clear();
    if (i == e)
      Pending.push_back(Root);
  }

  if (Pending.size() == 1)
    Root = Pending[0];
  else
    Root = DAG.getTokenFactor(getCurSDLoc(), Pending);

  DAG.setRoot(Root);
  Pending.clear();
  return Root;
}

SDValue SelectionDAGBuilder::getMemoryRoot() {
  return updateRoot(PendingLoads);
}

SDValue SelectionDAGBuilder::getRoot() {
  // Chain up all pending constrained intrinsics together with all
  // pending loads, by simply appending them to PendingLoads and
@@ -1072,35 +1089,12 @@ SDValue SelectionDAGBuilder::getRoot() {
}

SDValue SelectionDAGBuilder::getControlRoot() {
  SDValue Root = DAG.getRoot();

  // We need to emit pending fpexcept.strict constrained intrinsics,
  // so append them to the PendingExports list.
  PendingExports.append(PendingConstrainedFPStrict.begin(),
                        PendingConstrainedFPStrict.end());
  PendingConstrainedFPStrict.clear();

  if (PendingExports.empty())
    return Root;

  // Turn all of the CopyToReg chains into one factored node.
  if (Root.getOpcode() != ISD::EntryToken) {
    unsigned i = 0, e = PendingExports.size();
    for (; i != e; ++i) {
      assert(PendingExports[i].getNode()->getNumOperands() > 1);
      if (PendingExports[i].getNode()->getOperand(0) == Root)
        break;  // Don't add the root if we already indirectly depend on it.
    }

    if (i == e)
      PendingExports.push_back(Root);
  }

  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
                     PendingExports);
  PendingExports.clear();
  DAG.setRoot(Root);
  return Root;
  return updateRoot(PendingExports);
}

void SelectionDAGBuilder::visit(const Instruction &I) {
+3 −0
Original line number Diff line number Diff line
@@ -154,6 +154,9 @@ private:
  SmallVector<SDValue, 8> PendingConstrainedFP;
  SmallVector<SDValue, 8> PendingConstrainedFPStrict;

  /// Update root to include all chains from the Pending list.
  SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);

  /// A unique monotonically increasing number used to order the SDNodes we
  /// create.
  unsigned SDNodeOrder;
+61 −3
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
declare float @llvm.sqrt.f32(float)
declare void @llvm.s390.sfpc(i32)
declare void @bar()

; The basic assumption of all following tests is that on z13, we never
; want to see two square root instructions directly in a row, so the
@@ -300,7 +301,7 @@ define void @f13(float %f1) {
  ret void
}

define void @f14(float %f1) {
define void @f14(float %f1) #0 {
; CHECK-LABEL: f14:
; CHECK-NOT: sqeb
; CHECK: br %r14
@@ -313,7 +314,7 @@ define void @f14(float %f1) {
  ret void
}

define void @f15(float %f1) {
define void @f15(float %f1) #0 {
; CHECK-LABEL: f15:
; CHECK-NOT: sqeb
; CHECK: br %r14
@@ -326,7 +327,7 @@ define void @f15(float %f1) {
  ret void
}

define void @f16(float %f1) {
define void @f16(float %f1) #0 {
; CHECK-LABEL: f16:
; CHECK: sqebr
; CHECK: br %r14
@@ -339,4 +340,61 @@ define void @f16(float %f1) {
  ret void
}


; Verify that constrained intrinsics and memory operations get their
; chains linked up properly.

define void @f17(float %in, float* %out) #0 {
; CHECK-LABEL: f17:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
  %sqrt = call float @llvm.sqrt.f32(float %in)
  store float %sqrt, float* %out, align 4
  tail call void @bar() #0
  ret void
}

define void @f18(float %in, float* %out) #0 {
; CHECK-LABEL: f18:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
  %sqrt = call float @llvm.experimental.constrained.sqrt.f32(
                        float %in,
                        metadata !"round.dynamic",
                        metadata !"fpexcept.ignore") #0
  store float %sqrt, float* %out, align 4
  tail call void @bar() #0
  ret void
}

define void @f19(float %in, float* %out) #0 {
; CHECK-LABEL: f19:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
  %sqrt = call float @llvm.experimental.constrained.sqrt.f32(
                        float %in,
                        metadata !"round.dynamic",
                        metadata !"fpexcept.maytrap") #0
  store float %sqrt, float* %out, align 4
  tail call void @bar() #0
  ret void
}

define void @f20(float %in, float* %out) #0 {
; CHECK-LABEL: f20:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
  %sqrt = call float @llvm.experimental.constrained.sqrt.f32(
                        float %in,
                        metadata !"round.dynamic",
                        metadata !"fpexcept.strict") #0
  store float %sqrt, float* %out, align 4
  tail call void @bar() #0
  ret void
}

attributes #0 = { strictfp }