diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 618 |
1 files changed, 434 insertions, 184 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index bd7ab7c98781..89494a7f6497 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" @@ -271,7 +270,10 @@ class SimplifyCFGOpt { bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); - bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly); + bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly); + bool hoistSuccIdenticalTerminatorToSwitchOrIf( + Instruction *TI, Instruction *I1, + SmallVectorImpl<Instruction *> &OtherSuccTIs); bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB); bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, BasicBlock *TrueBB, BasicBlock *FalseBB, @@ -499,7 +501,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { return CI; else return cast<ConstantInt>( - ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); + ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL)); } return nullptr; } @@ -819,7 +821,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - llvm::erase_value(Cases, BB); + llvm::erase(Cases, BB); } /// Return true if there are any keys in C1 that exist in C2 as well. @@ -1098,12 +1100,13 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // Note that there may be multiple predecessor blocks, so we cannot move // bonus instructions to a predecessor block. for (Instruction &BonusInst : *BB) { - if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator()) + if (BonusInst.isTerminator()) continue; Instruction *NewBonusInst = BonusInst.clone(); - if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { + if (!isa<DbgInfoIntrinsic>(BonusInst) && + PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { // Unless the instruction has the same !dbg location as the original // branch, drop it. When we fold the bonus instructions we want to make // sure we reset their debug locations in order to avoid stepping on @@ -1113,7 +1116,6 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&BonusInst] = NewBonusInst; // If we speculated an instruction, we need to drop any metadata that may // result in undefined behavior, as the metadata might have been valid @@ -1123,8 +1125,16 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( NewBonusInst->dropUBImplyingAttrsAndMetadata(); NewBonusInst->insertInto(PredBlock, PTI->getIterator()); + auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst); + RemapDPValueRange(NewBonusInst->getModule(), Range, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + if (isa<DbgInfoIntrinsic>(BonusInst)) + continue; + NewBonusInst->takeName(&BonusInst); BonusInst.setName(NewBonusInst->getName() + ".old"); + VMap[&BonusInst] = NewBonusInst; // Update (liveout) uses of bonus instructions, // now that the bonus instruction has been cloned into predecessor. @@ -1303,7 +1313,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( } for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor : NewSuccessors) { - for (auto I : seq(0, NewSuccessor.second)) { + for (auto I : seq(NewSuccessor.second)) { (void)I; AddPredecessorToBlock(NewSuccessor.first, Pred, BB); } @@ -1408,8 +1418,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, } // If we would need to insert a select that uses the value of this invoke -// (comments in HoistThenElseCodeToIf explain why we would need to do this), we -// can't hoist the invoke, as there is nowhere to put the select in this case. +// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would +// need to do this), we can't hoist the invoke, as there is nowhere to put the +// select in this case. static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2) { for (BasicBlock *Succ : successors(BB1)) { @@ -1424,9 +1435,9 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, return true; } -// Get interesting characteristics of instructions that `HoistThenElseCodeToIf` -// didn't hoist. They restrict what kind of instructions can be reordered -// across. +// Get interesting characteristics of instructions that +// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of +// instructions can be reordered across. enum SkipFlags { SkipReadMem = 1, SkipSideEffect = 2, @@ -1484,7 +1495,7 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) { static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); -/// Helper function for HoistThenElseCodeToIf. Return true if identical +/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical /// instructions \p I1 and \p I2 can and should be hoisted. static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI) { @@ -1515,62 +1526,51 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, return true; } -/// Given a conditional branch that goes to BB1 and BB2, hoist any common code -/// in the two blocks up into the branch block. The caller of this function -/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, -/// only perform hoisting in case both blocks only contain a terminator. In that -/// case, only the original BI will be replaced and selects for PHIs are added. -bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { +/// Hoist any common code in the successor blocks up into the block. This +/// function guarantees that BB dominates all successors. If EqTermsOnly is +/// given, only perform hoisting in case both blocks only contain a terminator. +/// In that case, only the original BI will be replaced and selects for PHIs are +/// added. +bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, + bool EqTermsOnly) { // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. In particular, we don't want to get into - // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // instructions in the two blocks. In particular, we don't want to get into + // O(N1*N2*...) situations here where Ni are the sizes of these successors. As // such, we currently just scan for obviously identical instructions in an // identical order, possibly separated by the same number of non-identical // instructions. - BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. - BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + unsigned int SuccSize = succ_size(BB); + if (SuccSize < 2) + return false; // If either of the blocks has it's address taken, then we can't do this fold, // because the code we'd hoist would no longer run when we jump into the block // by it's address. - if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) - return false; + for (auto *Succ : successors(BB)) + if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor()) + return false; - BasicBlock::iterator BB1_Itr = BB1->begin(); - BasicBlock::iterator BB2_Itr = BB2->begin(); + auto *TI = BB->getTerminator(); - Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa<DbgInfoIntrinsic>(I1)) - I1 = &*BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = &*BB2_Itr++; + // The second of pair is a SkipFlags bitmask. + using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>; + SmallVector<SuccIterPair, 8> SuccIterPairs; + for (auto *Succ : successors(BB)) { + BasicBlock::iterator SuccItr = Succ->begin(); + if (isa<PHINode>(*SuccItr)) + return false; + SuccIterPairs.push_back(SuccIterPair(SuccItr, 0)); } - if (isa<PHINode>(I1)) - return false; - - BasicBlock *BIParent = BI->getParent(); - - bool Changed = false; - - auto _ = make_scope_exit([&]() { - if (Changed) - ++NumHoistCommonCode; - }); // Check if only hoisting terminators is allowed. This does not add new // instructions to the hoist location. if (EqTermsOnly) { // Skip any debug intrinsics, as they are free to hoist. - auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); - auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); - if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) - return false; - if (!I1NonDbg->isTerminator()) - return false; + for (auto &SuccIter : make_first_range(SuccIterPairs)) { + auto *INonDbg = &*skipDebugIntrinsics(SuccIter); + if (!INonDbg->isTerminator()) + return false; + } // Now we know that we only need to hoist debug intrinsics and the // terminator. Let the loop below handle those 2 cases. } @@ -1579,153 +1579,235 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { // many instructions we skip, serving as a compilation time control as well as // preventing excessive increase of life ranges. unsigned NumSkipped = 0; + // If we find an unreachable instruction at the beginning of a basic block, we + // can still hoist instructions from the rest of the basic blocks. + if (SuccIterPairs.size() > 2) { + erase_if(SuccIterPairs, + [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); }); + if (SuccIterPairs.size() < 2) + return false; + } - // Record any skipped instuctions that may read memory, write memory or have - // side effects, or have implicit control flow. - unsigned SkipFlagsBB1 = 0; - unsigned SkipFlagsBB2 = 0; + bool Changed = false; for (;;) { + auto *SuccIterPairBegin = SuccIterPairs.begin(); + auto &BB1ItrPair = *SuccIterPairBegin++; + auto OtherSuccIterPairRange = + iterator_range(SuccIterPairBegin, SuccIterPairs.end()); + auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange); + + Instruction *I1 = &*BB1ItrPair.first; + auto *BB1 = I1->getParent(); + + // Skip debug info if it is not identical. + bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) { + Instruction *I2 = &*Iter; + return I1->isIdenticalToWhenDefined(I2); + }); + if (!AllDbgInstsAreIdentical) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = &*++BB1ItrPair.first; + for (auto &SuccIter : OtherSuccIterRange) { + Instruction *I2 = &*SuccIter; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = &*++SuccIter; + } + } + + bool AllInstsAreIdentical = true; + bool HasTerminator = I1->isTerminator(); + for (auto &SuccIter : OtherSuccIterRange) { + Instruction *I2 = &*SuccIter; + HasTerminator |= I2->isTerminator(); + if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2)) + AllInstsAreIdentical = false; + } + // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. - if (I1->isTerminator() || I2->isTerminator()) { + if (HasTerminator) { + // Even if BB, which contains only one unreachable instruction, is ignored + // at the beginning of the loop, we can hoist the terminator instruction. // If any instructions remain in the block, we cannot hoist terminators. - if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) + if (NumSkipped || !AllInstsAreIdentical) return Changed; - goto HoistTerminator; + SmallVector<Instruction *, 8> Insts; + for (auto &SuccIter : OtherSuccIterRange) + Insts.push_back(&*SuccIter); + return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed; } - if (I1->isIdenticalToWhenDefined(I2) && - // Even if the instructions are identical, it may not be safe to hoist - // them if we have skipped over instructions with side effects or their - // operands weren't hoisted. - isSafeToHoistInstr(I1, SkipFlagsBB1) && - isSafeToHoistInstr(I2, SkipFlagsBB2) && - shouldHoistCommonInstructions(I1, I2, TTI)) { - if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) { - assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2)); + if (AllInstsAreIdentical) { + unsigned SkipFlagsBB1 = BB1ItrPair.second; + AllInstsAreIdentical = + isSafeToHoistInstr(I1, SkipFlagsBB1) && + all_of(OtherSuccIterPairRange, [=](const auto &Pair) { + Instruction *I2 = &*Pair.first; + unsigned SkipFlagsBB2 = Pair.second; + // Even if the instructions are identical, it may not + // be safe to hoist them if we have skipped over + // instructions with side effects or their operands + // weren't hoisted. + return isSafeToHoistInstr(I2, SkipFlagsBB2) && + shouldHoistCommonInstructions(I1, I2, TTI); + }); + } + + if (AllInstsAreIdentical) { + BB1ItrPair.first++; + if (isa<DbgInfoIntrinsic>(I1)) { // The debug location is an integral part of a debug info intrinsic // and can't be separated from it or replaced. Instead of attempting // to merge locations, simply hoist both copies of the intrinsic. - BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); - BIParent->splice(BI->getIterator(), BB2, I2->getIterator()); + I1->moveBeforePreserving(TI); + for (auto &SuccIter : OtherSuccIterRange) { + auto *I2 = &*SuccIter++; + assert(isa<DbgInfoIntrinsic>(I2)); + I2->moveBeforePreserving(TI); + } } else { // For a normal instruction, we just move one to right before the // branch, then replace all uses of the other with the first. Finally, // we remove the now redundant second instruction. - BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->andIRFlags(I2); - combineMetadataForCSE(I1, I2, true); - - // I1 and I2 are being combined into a single instruction. Its debug - // location is the merged locations of the original instructions. - I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - I2->eraseFromParent(); + I1->moveBeforePreserving(TI); + BB->splice(TI->getIterator(), BB1, I1->getIterator()); + for (auto &SuccIter : OtherSuccIterRange) { + Instruction *I2 = &*SuccIter++; + assert(I2 != I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + combineMetadataForCSE(I1, I2, true); + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + I2->eraseFromParent(); + } } + if (!Changed) + NumHoistCommonCode += SuccIterPairs.size(); Changed = true; - ++NumHoistCommonInstrs; + NumHoistCommonInstrs += SuccIterPairs.size(); } else { if (NumSkipped >= HoistCommonSkipLimit) return Changed; // We are about to skip over a pair of non-identical instructions. Record // if any have characteristics that would prevent reordering instructions // across them. - SkipFlagsBB1 |= skippedInstrFlags(I1); - SkipFlagsBB2 |= skippedInstrFlags(I2); + for (auto &SuccIterPair : SuccIterPairs) { + Instruction *I = &*SuccIterPair.first++; + SuccIterPair.second |= skippedInstrFlags(I); + } ++NumSkipped; } - - I1 = &*BB1_Itr++; - I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa<DbgInfoIntrinsic>(I1)) - I1 = &*BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = &*BB2_Itr++; - } } +} - return Changed; +bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( + Instruction *TI, Instruction *I1, + SmallVectorImpl<Instruction *> &OtherSuccTIs) { -HoistTerminator: - // It may not be possible to hoist an invoke. + auto *BI = dyn_cast<BranchInst>(TI); + + bool Changed = false; + BasicBlock *TIParent = TI->getParent(); + BasicBlock *BB1 = I1->getParent(); + + // Use only for an if statement. + auto *I2 = *OtherSuccTIs.begin(); + auto *BB2 = I2->getParent(); + if (BI) { + assert(OtherSuccTIs.size() == 1); + assert(BI->getSuccessor(0) == I1->getParent()); + assert(BI->getSuccessor(1) == I2->getParent()); + } + + // In the case of an if statement, we try to hoist an invoke. // FIXME: Can we define a safety predicate for CallBr? - if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) - return Changed; + // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll + // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit? + if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2))) + return false; // TODO: callbr hoisting currently disabled pending further study. if (isa<CallBrInst>(I1)) - return Changed; + return false; for (BasicBlock *Succ : successors(BB1)) { for (PHINode &PN : Succ->phis()) { Value *BB1V = PN.getIncomingValueForBlock(BB1); - Value *BB2V = PN.getIncomingValueForBlock(BB2); - if (BB1V == BB2V) - continue; + for (Instruction *OtherSuccTI : OtherSuccTIs) { + Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent()); + if (BB1V == BB2V) + continue; - // Check for passingValueIsAlwaysUndefined here because we would rather - // eliminate undefined control flow then converting it to a select. - if (passingValueIsAlwaysUndefined(BB1V, &PN) || - passingValueIsAlwaysUndefined(BB2V, &PN)) - return Changed; + // In the case of an if statement, check for + // passingValueIsAlwaysUndefined here because we would rather eliminate + // undefined control flow then converting it to a select. + if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) || + passingValueIsAlwaysUndefined(BB2V, &PN)) + return false; + } } } // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); - NT->insertInto(BIParent, BI->getIterator()); + NT->insertInto(TIParent, TI->getIterator()); if (!NT->getType()->isVoidTy()) { I1->replaceAllUsesWith(NT); - I2->replaceAllUsesWith(NT); + for (Instruction *OtherSuccTI : OtherSuccTIs) + OtherSuccTI->replaceAllUsesWith(NT); NT->takeName(I1); } Changed = true; - ++NumHoistCommonInstrs; + NumHoistCommonInstrs += OtherSuccTIs.size() + 1; // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. - NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + SmallVector<DILocation *, 4> Locs; + Locs.push_back(I1->getDebugLoc()); + for (auto *OtherSuccTI : OtherSuccTIs) + Locs.push_back(OtherSuccTI->getDebugLoc()); + NT->setDebugLoc(DILocation::getMergedLocations(Locs)); // PHIs created below will adopt NT's merged DebugLoc. IRBuilder<NoFolder> Builder(NT); - // Hoisting one of the terminators from our successor is a great thing. - // Unfortunately, the successors of the if/else blocks may have PHI nodes in - // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI - // nodes, so we insert select instruction to compute the final result. - std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; - for (BasicBlock *Succ : successors(BB1)) { - for (PHINode &PN : Succ->phis()) { - Value *BB1V = PN.getIncomingValueForBlock(BB1); - Value *BB2V = PN.getIncomingValueForBlock(BB2); - if (BB1V == BB2V) - continue; + // In the case of an if statement, hoisting one of the terminators from our + // successor is a great thing. Unfortunately, the successors of the if/else + // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2 + // must agree for all PHI nodes, so we insert select instruction to compute + // the final result. + if (BI) { + std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; + for (BasicBlock *Succ : successors(BB1)) { + for (PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; - // These values do not agree. Insert a select instruction before NT - // that determines the right value. - SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (!SI) { - // Propagate fast-math-flags from phi node to its replacement select. - IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); - if (isa<FPMathOperator>(PN)) - Builder.setFastMathFlags(PN.getFastMathFlags()); - - SI = cast<SelectInst>( - Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, - BB1V->getName() + "." + BB2V->getName(), BI)); - } + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (!SI) { + // Propagate fast-math-flags from phi node to its replacement select. + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + if (isa<FPMathOperator>(PN)) + Builder.setFastMathFlags(PN.getFastMathFlags()); + + SI = cast<SelectInst>(Builder.CreateSelect( + BI->getCondition(), BB1V, BB2V, + BB1V->getName() + "." + BB2V->getName(), BI)); + } - // Make the PHI node use the select for all incoming values for BB1/BB2 - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) - if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) - PN.setIncomingValue(i, SI); + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) + PN.setIncomingValue(i, SI); + } } } @@ -1733,16 +1815,16 @@ HoistTerminator: // Update any PHI nodes in our new successors. for (BasicBlock *Succ : successors(BB1)) { - AddPredecessorToBlock(Succ, BIParent, BB1); + AddPredecessorToBlock(Succ, TIParent, BB1); if (DTU) - Updates.push_back({DominatorTree::Insert, BIParent, Succ}); + Updates.push_back({DominatorTree::Insert, TIParent, Succ}); } if (DTU) - for (BasicBlock *Succ : successors(BI)) - Updates.push_back({DominatorTree::Delete, BIParent, Succ}); + for (BasicBlock *Succ : successors(TI)) + Updates.push_back({DominatorTree::Delete, TIParent, Succ}); - EraseTerminatorAndDCECond(BI); + EraseTerminatorAndDCECond(TI); if (DTU) DTU->applyUpdates(Updates); return Changed; @@ -1808,10 +1890,19 @@ static bool canSinkInstructions( } const Instruction *I0 = Insts.front(); - for (auto *I : Insts) + for (auto *I : Insts) { if (!I->isSameOperationAs(I0)) return false; + // swifterror pointers can only be used by a load or store; sinking a load + // or store would require introducing a select for the pointer operand, + // which isn't allowed for swifterror pointers. + if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError()) + return false; + if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError()) + return false; + } + // All instructions in Insts are known to be the same opcode. If they have a // use, check that the only user is a PHI or in the same block as the // instruction, because if a user is in the same block as an instruction we're @@ -1952,8 +2043,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { // Create a new PHI in the successor block and populate it. auto *Op = I0->getOperand(O); assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); - auto *PN = PHINode::Create(Op->getType(), Insts.size(), - Op->getName() + ".sink", &BBEnd->front()); + auto *PN = + PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink"); + PN->insertBefore(BBEnd->begin()); for (auto *I : Insts) PN->addIncoming(I->getOperand(O), I->getParent()); NewOperands.push_back(PN); @@ -1963,7 +2055,8 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { // and move it to the start of the successor block. for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) I0->getOperandUse(O).set(NewOperands[O]); - I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt()); // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) @@ -2765,8 +2858,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, Value *OrigV = PN.getIncomingValueForBlock(BB); Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. - // Skip PHIs which are trivial. + // FIXME: Try to remove some of the duplication with + // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial. if (ThenV == OrigV) continue; @@ -3009,7 +3102,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, // store %merge, %x.dest, !DIAssignID !2 // dbg.assign %merge, "x", ..., !2 for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) { - if (any_of(DAI->location_ops(), [&](Value *V) { return V == OrigV; })) + if (llvm::is_contained(DAI->location_ops(), OrigV)) DAI->replaceVariableLocationOp(OrigV, S); } } @@ -3036,6 +3129,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, } // Hoist the instructions. + // In "RemoveDIs" non-instr debug-info mode, drop DPValues attached to these + // instructions, in the same way that dbg.value intrinsics are dropped at the + // end of this block. + for (auto &It : make_range(ThenBB->begin(), ThenBB->end())) + It.dropDbgValues(); BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(), std::prev(ThenBB->end())); @@ -3207,6 +3305,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt(); DenseMap<Value *, Value *> TranslateMap; // Track translated values. TranslateMap[Cond] = CB; + + // RemoveDIs: track instructions that we optimise away while folding, so + // that we can copy DPValues from them later. + BasicBlock::iterator SrcDbgCursor = BB->begin(); for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (PHINode *PN = dyn_cast<PHINode>(BBI)) { TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB); @@ -3241,6 +3343,15 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, TranslateMap[&*BBI] = N; } if (N) { + // Copy all debug-info attached to instructions from the last we + // successfully clone, up to this instruction (they might have been + // folded away). + for (; SrcDbgCursor != BBI; ++SrcDbgCursor) + N->cloneDebugInfoFrom(&*SrcDbgCursor); + SrcDbgCursor = std::next(BBI); + // Clone debug-info on this instruction too. + N->cloneDebugInfoFrom(&*BBI); + // Register the new instruction with the assumption cache if necessary. if (auto *Assume = dyn_cast<AssumeInst>(N)) if (AC) @@ -3248,6 +3359,10 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, } } + for (; &*SrcDbgCursor != BI; ++SrcDbgCursor) + InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor); + InsertPt->cloneDebugInfoFrom(BI); + BB->removePredecessor(EdgeBB); BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator()); EdgeBI->setSuccessor(0, RealDest); @@ -3652,22 +3767,22 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, ValueToValueMapTy VMap; // maps original values to cloned values CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); + Module *M = BB->getModule(); + + if (PredBlock->IsNewDbgInfoFormat) { + PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator()); + for (DPValue &DPV : PredBlock->getTerminator()->getDbgValueRange()) { + RemapDPValue(M, &DPV, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + } + } + // Now that the Cond was cloned into the predecessor basic block, // or/and the two conditions together. Value *BICond = VMap[BI->getCondition()]; PBI->setCondition( createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond")); - // Copy any debug value intrinsics into the end of PredBlock. - for (Instruction &I : *BB) { - if (isa<DbgInfoIntrinsic>(I)) { - Instruction *NewI = I.clone(); - RemapInstruction(NewI, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - NewI->insertBefore(PBI); - } - } - ++NumFoldBranchToCommonDest; return true; } @@ -3867,7 +3982,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB)) return V; - PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); + PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge"); + PHI->insertBefore(Succ->begin()); PHI->addIncoming(V, BB); for (BasicBlock *PredBB : predecessors(Succ)) if (PredBB != BB) @@ -3991,7 +4107,9 @@ static bool mergeConditionalStoreToAddress( Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), QStore->getParent(), PPHI); - IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); + BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt(); + IRBuilder<> QB(PostBB, PostBBFirst); + QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc()); Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); @@ -4002,9 +4120,11 @@ static bool mergeConditionalStoreToAddress( QPred = QB.CreateNot(QPred); Value *CombinedPred = QB.CreateOr(PPred, QPred); - auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), + BasicBlock::iterator InsertPt = QB.GetInsertPoint(); + auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt, /*Unreachable=*/false, /*BranchWeights=*/nullptr, DTU); + QB.SetInsertPoint(T); StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata())); @@ -4140,10 +4260,10 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // 2) We can sink side effecting instructions into BI's fallthrough // successor provided they doesn't contribute to computation of // BI's condition. - Value *CondWB, *WC; - BasicBlock *IfTrueBB, *IfFalseBB; - if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) || - IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor()) + BasicBlock *IfTrueBB = PBI->getSuccessor(0); + BasicBlock *IfFalseBB = PBI->getSuccessor(1); + if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() || + !BI->getParent()->getSinglePredecessor()) return false; if (!IfFalseBB->phis().empty()) return false; // TODO @@ -4256,6 +4376,21 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (PBI->getSuccessor(PBIOp) == BB) return false; + // If predecessor's branch probability to BB is too low don't merge branches. + SmallVector<uint32_t, 2> PredWeights; + if (!PBI->getMetadata(LLVMContext::MD_unpredictable) && + extractBranchWeights(*PBI, PredWeights) && + (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) { + + BranchProbability CommonDestProb = BranchProbability::getBranchProbability( + PredWeights[PBIOp], + static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]); + + BranchProbability Likely = TTI.getPredictableBranchThreshold(); + if (CommonDestProb >= Likely) + return false; + } + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. @@ -5088,6 +5223,15 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { bool Changed = false; + // Ensure that any debug-info records that used to occur after the Unreachable + // are moved to in front of it -- otherwise they'll "dangle" at the end of + // the block. + BB->flushTerminatorDbgValues(); + + // Debug-info records on the unreachable inst itself should be deleted, as + // below we delete everything past the final executable instruction. + UI->dropDbgValues(); + // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI->getIterator() != BB->begin()) { @@ -5104,6 +5248,10 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn, // and we can therefore guarantee this block will be erased. + // If we're deleting this, we're deleting any subsequent dbg.values, so + // delete DPValue records of variable information. + BBI->dropDbgValues(); + // Delete this instruction (any uses are guaranteed to be dead) BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); BBI->eraseFromParent(); @@ -5667,7 +5815,7 @@ getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) { if (I.isTerminator()) { // If the terminator is a simple branch, continue to the next block. - if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator()) + if (I.getNumSuccessors() != 1 || I.isSpecialTerminator()) return false; Pred = CaseDest; CaseDest = I.getSuccessor(0); @@ -5890,8 +6038,8 @@ static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, // Remove the switch. - while (PHI->getBasicBlockIndex(SelectBB) >= 0) - PHI->removeIncomingValue(SelectBB); + PHI->removeIncomingValueIf( + [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; }); PHI->addIncoming(SelectValue, SelectBB); SmallPtrSet<BasicBlock *, 4> RemovedSuccessors; @@ -6507,9 +6655,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If the default destination is unreachable, or if the lookup table covers // all values of the conditional variable, branch directly to the lookup table // BB. Otherwise, check that the condition is within the case range. - const bool DefaultIsReachable = + bool DefaultIsReachable = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); @@ -6540,6 +6687,28 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, BranchInst *RangeCheckBranch = nullptr; + // Grow the table to cover all possible index values to avoid the range check. + // It will use the default result to fill in the table hole later, so make + // sure it exist. + if (UseSwitchConditionAsTableIndex && HasDefaultResults) { + ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false); + // Grow the table shouldn't have any size impact by checking + // WouldFitInRegister. + // TODO: Consider growing the table also when it doesn't fit in a register + // if no optsize is specified. + const uint64_t UpperBound = CR.getUpper().getLimitedValue(); + if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) { + return SwitchLookupTable::WouldFitInRegister( + DL, UpperBound, KV.second /* ResultType */); + })) { + // The default branch is unreachable after we enlarge the lookup table. + // Adjust DefaultIsReachable to reuse code path. + TableSize = UpperBound; + DefaultIsReachable = false; + } + } + + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); if (DTU) @@ -6701,9 +6870,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, // This transform can be done speculatively because it is so cheap - it // results in a single rotate operation being inserted. - // FIXME: It's possible that optimizing a switch on powers of two might also - // be beneficial - flag values are often powers of two and we could use a CLZ - // as the key function. // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than // one element and LLVM disallows duplicate cases, Shift is guaranteed to be @@ -6748,6 +6914,80 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, return true; } +/// Tries to transform switch of powers of two to reduce switch range. +/// For example, switch like: +/// switch (C) { case 1: case 2: case 64: case 128: } +/// will be transformed to: +/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: } +/// +/// This transformation allows better lowering and could allow transforming into +/// a lookup table. +static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + Value *Condition = SI->getCondition(); + LLVMContext &Context = SI->getContext(); + auto *CondTy = cast<IntegerType>(Condition->getType()); + + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) + return false; + + const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::cttz, CondTy, + {Condition, ConstantInt::getTrue(Context)}), + TTI::TCK_SizeAndLatency); + + if (CttzIntrinsicCost > TTI::TCC_Basic) + // Inserting intrinsic is too expensive. + return false; + + // Only bother with this optimization if there are more than 3 switch cases. + // SDAG will only bother creating jump tables for 4 or more cases. + if (SI->getNumCases() < 4) + return false; + + // We perform this optimization only for switches with + // unreachable default case. + // This assumtion will save us from checking if `Condition` is a power of two. + if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) + return false; + + // Check that switch cases are powers of two. + SmallVector<uint64_t, 4> Values; + for (const auto &Case : SI->cases()) { + uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue(); + if (llvm::has_single_bit(CaseValue)) + Values.push_back(CaseValue); + else + return false; + } + + // isSwichDense requires case values to be sorted. + llvm::sort(Values); + if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) - + llvm::countr_zero(Values.front()) + 1)) + // Transform is unable to generate dense switch. + return false; + + Builder.SetInsertPoint(SI); + + // Replace each case with its trailing zeros number. + for (auto &Case : SI->cases()) { + auto *OrigValue = Case.getCaseValue(); + Case.setValue(ConstantInt::get(OrigValue->getType(), + OrigValue->getValue().countr_zero())); + } + + // Replace condition with its trailing zeros number. + auto *ConditionTrailingZeros = Builder.CreateIntrinsic( + Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)}); + + SI->setCondition(ConditionTrailingZeros); + + return true; +} + bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -6795,9 +7035,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) return requestResimplify(); + if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) + return requestResimplify(); + if (ReduceSwitchRange(SI, Builder, DL, TTI)) return requestResimplify(); + if (HoistCommon && + hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts)) + return requestResimplify(); + return false; } @@ -6982,7 +7229,8 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + if (Options.SpeculateBlocks && + FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; @@ -7052,7 +7300,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, + if (Options.SpeculateBlocks && + FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); @@ -7062,7 +7311,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts)) + if (HoistCommon && hoistCommonCodeFromSuccessors( + BI->getParent(), !Options.HoistCommonInsts)) return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally |
