1 files changed, 196 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index abe0babc3f12..62b40a23e38c 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -69,6 +69,7 @@ using namespace llvm;
 
 STATISTIC(GuardsEliminated, "Number of eliminated guards");
 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
+STATISTIC(FreezeAdded, "Number of freeze instruction introduced");
 
 static cl::opt<bool>
     WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
@@ -113,6 +114,23 @@ static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
   ++GuardsEliminated;
 }
 
+/// Find a point at which the widened condition of \p Guard should be inserted.
+/// When it is represented as intrinsic call, we can do it right before the call
+/// instruction. However, when we are dealing with widenable branch, we must
+/// account for the following situation: widening should not turn a
+/// loop-invariant condition into a loop-variant. It means that if
+/// widenable.condition() call is invariant (w.r.t. any loop), the new wide
+/// condition should stay invariant. Otherwise there can be a miscompile, like
+/// the one described at https://github.com/llvm/llvm-project/issues/60234. The
+/// safest way to do it is to expand the new condition at WC's block.
+static Instruction *findInsertionPointForWideCondition(Instruction *Guard) {
+  Value *Condition, *WC;
+  BasicBlock *IfTrue, *IfFalse;
+  if (parseWidenableBranch(Guard, Condition, WC, IfTrue, IfFalse))
+    return cast<Instruction>(WC);
+  return Guard;
+}
+
 class GuardWideningImpl {
   DominatorTree &DT;
   PostDominatorTree *PDT;
@@ -170,16 +188,16 @@ class GuardWideningImpl {
                                      bool InvertCond);
 
   /// Helper to check if \p V can be hoisted to \p InsertPos.
-  bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
+  bool canBeHoistedTo(const Value *V, const Instruction *InsertPos) const {
     SmallPtrSet<const Instruction *, 8> Visited;
-    return isAvailableAt(V, InsertPos, Visited);
+    return canBeHoistedTo(V, InsertPos, Visited);
   }
 
-  bool isAvailableAt(const Value *V, const Instruction *InsertPos,
-                     SmallPtrSetImpl<const Instruction *> &Visited) const;
+  bool canBeHoistedTo(const Value *V, const Instruction *InsertPos,
+                      SmallPtrSetImpl<const Instruction *> &Visited) const;
 
   /// Helper to hoist \p V to \p InsertPos.  Guaranteed to succeed if \c
-  /// isAvailableAt returned true.
+  /// canBeHoistedTo returned true.
   void makeAvailableAt(Value *V, Instruction *InsertPos) const;
 
   /// Common helper used by \c widenGuard and \c isWideningCondProfitable.  Try
@@ -192,6 +210,10 @@ class GuardWideningImpl {
   bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
                        Value *&Result, bool InvertCondition);
 
+  /// Adds freeze to Orig and push it as far as possible very aggressively.
+  /// Also replaces all uses of frozen instruction with frozen version.
+  Value *freezeAndPush(Value *Orig, Instruction *InsertPt);
+
   /// Represents a range check of the form \c Base + \c Offset u< \c Length,
   /// with the constraint that \c Length is not negative.  \c CheckInst is the
   /// pre-existing instruction in the IR that computes the result of this range
@@ -263,8 +285,8 @@ class GuardWideningImpl {
   void widenGuard(Instruction *ToWiden, Value *NewCondition,
                   bool InvertCondition) {
     Value *Result;
-
-    widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
+    Instruction *InsertPt = findInsertionPointForWideCondition(ToWiden);
+    widenCondCommon(getCondition(ToWiden), NewCondition, InsertPt, Result,
                     InvertCondition);
     if (isGuardAsWidenableBranch(ToWiden)) {
       setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
@@ -422,7 +444,10 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
     HoistingOutOfLoop = true;
   }
 
-  if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
+  auto *WideningPoint = findInsertionPointForWideCondition(DominatingGuard);
+  if (!canBeHoistedTo(getCondition(DominatedInstr), WideningPoint))
+    return WS_IllegalOrNegative;
+  if (!canBeHoistedTo(getCondition(DominatingGuard), WideningPoint))
     return WS_IllegalOrNegative;
 
   // If the guard was conditional executed, it may never be reached
@@ -440,30 +465,70 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
   if (HoistingOutOfLoop)
     return WS_Positive;
 
-  // Returns true if we might be hoisting above explicit control flow.  Note
-  // that this completely ignores implicit control flow (guards, calls which
-  // throw, etc...).  That choice appears arbitrary.
-  auto MaybeHoistingOutOfIf = [&]() {
-    auto *DominatingBlock = DominatingGuard->getParent();
-    auto *DominatedBlock = DominatedInstr->getParent();
-    if (isGuardAsWidenableBranch(DominatingGuard))
-      DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
+  // For a given basic block \p BB, return its successor which is guaranteed or
+  // highly likely will be taken as its successor.
+  auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * {
+    if (auto *UniqueSucc = BB->getUniqueSuccessor())
+      return UniqueSucc;
+    auto *Term = BB->getTerminator();
+    Value *Cond = nullptr;
+    const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
+    using namespace PatternMatch;
+    if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue),
+                          m_BasicBlock(IfFalse))))
+      return nullptr;
+    // For constant conditions, only one dynamical successor is possible
+    if (auto *ConstCond = dyn_cast<ConstantInt>(Cond))
+      return ConstCond->isAllOnesValue() ? IfTrue : IfFalse;
+    // If one of successors ends with deopt, another one is likely.
+    if (IfFalse->getPostdominatingDeoptimizeCall())
+      return IfTrue;
+    if (IfTrue->getPostdominatingDeoptimizeCall())
+      return IfFalse;
+    // TODO: Use branch frequency metatada to allow hoisting through non-deopt
+    // branches?
+    return nullptr;
+  };
+
+  // Returns true if we might be hoisting above explicit control flow into a
+  // considerably hotter block.  Note that this completely ignores implicit
+  // control flow (guards, calls which throw, etc...).  That choice appears
+  // arbitrary (we assume that implicit control flow exits are all rare).
+  auto MaybeHoistingToHotterBlock = [&]() {
+    const auto *DominatingBlock = DominatingGuard->getParent();
+    const auto *DominatedBlock = DominatedInstr->getParent();
+
+    // Descend as low as we can, always taking the likely successor.
+    assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code");
+    assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code");
+    assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance");
+    while (DominatedBlock != DominatingBlock) {
+      auto *LikelySucc = GetLikelySuccessor(DominatingBlock);
+      // No likely successor?
+      if (!LikelySucc)
+        break;
+      // Only go down the dominator tree.
+      if (!DT.properlyDominates(DominatingBlock, LikelySucc))
+        break;
+      DominatingBlock = LikelySucc;
+    }
 
-    // Same Block?
+    // Found?
     if (DominatedBlock == DominatingBlock)
       return false;
-    // Obvious successor (common loop header/preheader case)
-    if (DominatedBlock == DominatingBlock->getUniqueSuccessor())
-      return false;
+    // We followed the likely successor chain and went past the dominated
+    // block. It means that the dominated guard is in dead/very cold code.
+    if (!DT.dominates(DominatingBlock, DominatedBlock))
+      return true;
     // TODO: diamond, triangle cases
     if (!PDT) return true;
     return !PDT->dominates(DominatedBlock, DominatingBlock);
   };
 
-  return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
+  return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral;
 }
 
-bool GuardWideningImpl::isAvailableAt(
+bool GuardWideningImpl::canBeHoistedTo(
     const Value *V, const Instruction *Loc,
     SmallPtrSetImpl<const Instruction *> &Visited) const {
   auto *Inst = dyn_cast<Instruction>(V);
@@ -482,7 +547,7 @@ bool GuardWideningImpl::isAvailableAt(
   assert(DT.isReachableFromEntry(Inst->getParent()) &&
          "We did a DFS from the block entry!");
   return all_of(Inst->operands(),
-                [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
+                [&](Value *Op) { return canBeHoistedTo(Op, Loc, Visited); });
 }
 
 void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
@@ -491,14 +556,115 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
     return;
 
   assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) &&
-         !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!");
+         !Inst->mayReadFromMemory() &&
+         "Should've checked with canBeHoistedTo!");
 
   for (Value *Op : Inst->operands())
     makeAvailableAt(Op, Loc);
 
   Inst->moveBefore(Loc);
-  // If we moved instruction before guard we must clean poison generating flags.
-  Inst->dropPoisonGeneratingFlags();
+}
+
+// Return Instruction before which we can insert freeze for the value V as close
+// to def as possible. If there is no place to add freeze, return nullptr.
+static Instruction *getFreezeInsertPt(Value *V, const DominatorTree &DT) {
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return &*DT.getRoot()->getFirstNonPHIOrDbgOrAlloca();
+
+  auto *Res = I->getInsertionPointAfterDef();
+  // If there is no place to add freeze - return nullptr.
+  if (!Res || !DT.dominates(I, Res))
+    return nullptr;
+
+  // If there is a User dominated by original I, then it should be dominated
+  // by Freeze instruction as well.
+  if (any_of(I->users(), [&](User *U) {
+        Instruction *User = cast<Instruction>(U);
+        return Res != User && DT.dominates(I, User) && !DT.dominates(Res, User);
+      }))
+    return nullptr;
+  return Res;
+}
+
+Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
+  if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT))
+    return Orig;
+  Instruction *InsertPtAtDef = getFreezeInsertPt(Orig, DT);
+  if (!InsertPtAtDef)
+    return new FreezeInst(Orig, "gw.freeze", InsertPt);
+  if (isa<Constant>(Orig) || isa<GlobalValue>(Orig))
+    return new FreezeInst(Orig, "gw.freeze", InsertPtAtDef);
+
+  SmallSet<Value *, 16> Visited;
+  SmallVector<Value *, 16> Worklist;
+  SmallSet<Instruction *, 16> DropPoisonFlags;
+  SmallVector<Value *, 16> NeedFreeze;
+  DenseMap<Value *, FreezeInst *> CacheOfFreezes;
+
+  // A bit overloaded data structures. Visited contains constant/GV
+  // if we already met it. In this case CacheOfFreezes has a freeze if it is
+  // required.
+  auto handleConstantOrGlobal = [&](Use &U) {
+    Value *Def = U.get();
+    if (!isa<Constant>(Def) && !isa<GlobalValue>(Def))
+      return false;
+
+    if (Visited.insert(Def).second) {
+      if (isGuaranteedNotToBePoison(Def, nullptr, InsertPt, &DT))
+        return true;
+      CacheOfFreezes[Def] = new FreezeInst(Def, Def->getName() + ".gw.fr",
+                                           getFreezeInsertPt(Def, DT));
+    }
+
+    if (CacheOfFreezes.count(Def))
+      U.set(CacheOfFreezes[Def]);
+    return true;
+  };
+
+  Worklist.push_back(Orig);
+  while (!Worklist.empty()) {
+    Value *V = Worklist.pop_back_val();
+    if (!Visited.insert(V).second)
+      continue;
+
+    if (isGuaranteedNotToBePoison(V, nullptr, InsertPt, &DT))
+      continue;
+
+    Instruction *I = dyn_cast<Instruction>(V);
+    if (!I || canCreateUndefOrPoison(cast<Operator>(I),
+                                     /*ConsiderFlagsAndMetadata*/ false)) {
+      NeedFreeze.push_back(V);
+      continue;
+    }
+    // Check all operands. If for any of them we cannot insert Freeze,
+    // stop here. Otherwise, iterate.
+    if (any_of(I->operands(), [&](Value *Op) {
+          return isa<Instruction>(Op) && !getFreezeInsertPt(Op, DT);
+        })) {
+      NeedFreeze.push_back(I);
+      continue;
+    }
+    DropPoisonFlags.insert(I);
+    for (Use &U : I->operands())
+      if (!handleConstantOrGlobal(U))
+        Worklist.push_back(U.get());
+  }
+  for (Instruction *I : DropPoisonFlags)
+    I->dropPoisonGeneratingFlagsAndMetadata();
+
+  Value *Result = Orig;
+  for (Value *V : NeedFreeze) {
+    auto *FreezeInsertPt = getFreezeInsertPt(V, DT);
+    FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr", FreezeInsertPt);
+    ++FreezeAdded;
+    if (V == Orig)
+      Result = FI;
+    V->replaceUsesWithIf(
+        FI, [&](const Use & U)->bool { return U.getUser() != FI; });
+  }
+
+  return Result;
 }
 
 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
@@ -532,6 +698,8 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
           if (InsertPt) {
             ConstantInt *NewRHS =
                 ConstantInt::get(Cond0->getContext(), NewRHSAP);
+            assert(canBeHoistedTo(LHS, InsertPt) && "must be");
+            makeAvailableAt(LHS, InsertPt);
             Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
           }
           return true;
@@ -558,6 +726,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
         }
         assert(Result && "Failed to find result value");
         Result->setName("wide.chk");
+        Result = freezeAndPush(Result, InsertPt);
       }
       return true;
     }
@@ -570,6 +739,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
     makeAvailableAt(Cond1, InsertPt);
     if (InvertCondition)
       Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
+    Cond1 = freezeAndPush(Cond1, InsertPt);
     Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
   }