diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-09-02 21:17:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-08 17:34:50 +0000 |
commit | 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch) | |
tree | 62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp | |
parent | cf037972ea8863e2bab7461d77345367d2c1e054 (diff) | |
parent | 7fa27ce4a07f19b07799a767fc29416f3b625afb (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp | 438 |
1 files changed, 261 insertions, 177 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index dd431cc6f4f5..b00df0b6c6cb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -82,7 +83,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -257,13 +257,17 @@ static cl::opt<bool> "CodeGenPrepare.")); static cl::opt<bool> - OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(false), + OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare")); static cl::opt<unsigned> HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function.")); +static cl::opt<unsigned> + MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), + cl::Hidden, + cl::desc("Max number of address users to look at")); namespace { enum ExtType { @@ -294,16 +298,16 @@ class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { const TargetMachine *TM = nullptr; - const TargetSubtargetInfo *SubtargetInfo; + const TargetSubtargetInfo *SubtargetInfo = nullptr; const TargetLowering *TLI = nullptr; - const TargetRegisterInfo *TRI; + const TargetRegisterInfo *TRI = nullptr; const TargetTransformInfo *TTI = nullptr; const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; - const TargetLibraryInfo *TLInfo; - const LoopInfo *LI; + const TargetLibraryInfo *TLInfo = nullptr; + LoopInfo *LI = nullptr; std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; - ProfileSummaryInfo *PSI; + ProfileSummaryInfo *PSI = nullptr; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -373,6 +377,15 @@ public: bool runOnFunction(Function &F) override; + void releaseMemory() override { + // Clear per function information. + InsertedInsts.clear(); + PromotedInsts.clear(); + FreshBBs.clear(); + BPI.reset(); + BFI.reset(); + } + StringRef getPassName() const override { return "CodeGen Prepare"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -413,7 +426,7 @@ private: void removeAllAssertingVHReferences(Value *V); bool eliminateAssumptions(Function &F); - bool eliminateFallThrough(Function &F); + bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -494,10 +507,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { DL = &F.getParent()->getDataLayout(); bool EverMadeChange = false; - // Clear per function information. - InsertedInsts.clear(); - PromotedInsts.clear(); - FreshBBs.clear(); TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); SubtargetInfo = TM->getSubtargetImpl(F); @@ -574,11 +583,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Because the basic algorithm's complex is near O(N!). IsHugeFunc = F.size() > HugeFuncThresholdInCGPP; + // Transformations above may invalidate dominator tree and/or loop info. + DT.reset(); + LI->releaseMemory(); + LI->analyze(getDT(F)); + bool MadeChange = true; bool FuncIterated = false; while (MadeChange) { MadeChange = false; - DT.reset(); for (BasicBlock &BB : llvm::make_early_inc_range(F)) { if (FuncIterated && !FreshBBs.contains(&BB)) @@ -587,6 +600,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT; bool Changed = optimizeBlock(BB, ModifiedDTOnIteration); + if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT) + DT.reset(); + MadeChange |= Changed; if (IsHugeFunc) { // If the BB is updated, it may still has chance to be optimized. @@ -602,9 +618,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { FreshBBs.insert(&BB); else if (FuncIterated) FreshBBs.erase(&BB); - - if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT) - DT.reset(); } else { // For small/normal functions, we restart BB iteration if the dominator // tree of the Function was changed. @@ -622,7 +635,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= optimizePhiTypes(F); if (MadeChange) - eliminateFallThrough(F); + eliminateFallThrough(F, DT.get()); + +#ifndef NDEBUG + if (MadeChange && VerifyLoopInfo) + LI->verify(getDT(F)); +#endif // Really free removed instructions during promotion. for (Instruction *I : RemovedInsts) @@ -755,7 +773,7 @@ void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { /// Merge basic blocks which are connected by a single edge, where one of the /// basic blocks has a single successor pointing to the other basic block, /// which has a single predecessor. -bool CodeGenPrepare::eliminateFallThrough(Function &F) { +bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) { bool Changed = false; // Scan all of the blocks in the function, except for the entry block. // Use a temporary array to avoid iterator being invalidated when @@ -777,13 +795,19 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; + // Make an effort to skip unreachable blocks. + if (DT && !DT->isReachableFromEntry(BB)) + continue; + BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); if (Term && !Term->isConditional()) { Changed = true; LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n"); // Merge BB into SinglePred and delete it. - MergeBlockIntoPredecessor(BB); + MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr, + /* MemDep */ nullptr, + /* PredecessorWithTwoSuccessors */ false, DT); Preds.insert(SinglePred); if (IsHugeFunc) { @@ -1579,6 +1603,7 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, /// intrinsic. Return true if any changes were made. bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT) { + bool EdgeCase = false; Value *A, *B; BinaryOperator *Add; if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { @@ -1587,11 +1612,12 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. A = Add->getOperand(0); B = Add->getOperand(1); + EdgeCase = true; } if (!TLI->shouldFormOverflowOp(ISD::UADDO, TLI->getValueType(*DL, Add->getType()), - Add->hasNUsesOrMore(2))) + Add->hasNUsesOrMore(EdgeCase ? 1 : 2))) return false; // We don't want to move around uses of condition values this late, so we @@ -1660,7 +1686,7 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, if (!TLI->shouldFormOverflowOp(ISD::USUBO, TLI->getValueType(*DL, Sub->getType()), - Sub->hasNUsesOrMore(2))) + Sub->hasNUsesOrMore(1))) return false; if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1), @@ -1825,6 +1851,37 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp, return true; } +/// Many architectures use the same instruction for both subtract and cmp. Try +/// to swap cmp operands to match subtract operations to allow for CSE. +static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) { + Value *Op0 = Cmp->getOperand(0); + Value *Op1 = Cmp->getOperand(1); + if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) || + isa<Constant>(Op1) || Op0 == Op1) + return false; + + // If a subtract already has the same operands as a compare, swapping would be + // bad. If a subtract has the same operands as a compare but in reverse order, + // then swapping is good. + int GoodToSwap = 0; + unsigned NumInspected = 0; + for (const User *U : Op0->users()) { + // Avoid walking many users. + if (++NumInspected > 128) + return false; + if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0)))) + GoodToSwap++; + else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1)))) + GoodToSwap--; + } + + if (GoodToSwap > 0) { + Cmp->swapOperands(); + return true; + } + return false; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -1838,6 +1895,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (foldICmpWithDominatingICmp(Cmp, *TLI)) return true; + if (swapICmpOperandsToExposeCSEOpportunities(Cmp)) + return true; + return false; } @@ -2129,6 +2189,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, /// /// If the transform is performed, return true and set ModifiedDT to true. static bool despeculateCountZeros(IntrinsicInst *CountZeros, + LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet<BasicBlock *, 32> &FreshBBs, @@ -2168,6 +2229,13 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, if (IsHugeFunc) FreshBBs.insert(EndBlock); + // Update the LoopInfo. The new blocks are in the same loop as the start + // block. + if (Loop *L = LI.getLoopFor(StartBlock)) { + L->addBasicBlockToLoop(CallBlock, LI); + L->addBasicBlockToLoop(EndBlock, LI); + } + // Set up a builder to create a compare, conditional branch, and PHI. IRBuilder<> Builder(CountZeros->getContext()); Builder.SetInsertPoint(StartBlock->getTerminator()); @@ -2279,7 +2347,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { if (!Arg->getType()->isPointerTy()) continue; unsigned AS = Arg->getType()->getPointerAddressSpace(); - return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); + if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS)) + return true; } IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); @@ -2341,7 +2410,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { case Intrinsic::cttz: case Intrinsic::ctlz: // If counting zeros is expensive, try to avoid it. - return despeculateCountZeros(II, TLI, DL, ModifiedDT, FreshBBs, + return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs, IsHugeFunc); case Intrinsic::fshl: case Intrinsic::fshr: @@ -2349,24 +2418,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { case Intrinsic::dbg_assign: case Intrinsic::dbg_value: return fixupDbgValue(II); - case Intrinsic::vscale: { - // If datalayout has no special restrictions on vector data layout, - // replace `llvm.vscale` by an equivalent constant expression - // to benefit from cheap constant propagation. - Type *ScalableVectorTy = - VectorType::get(Type::getInt8Ty(II->getContext()), 1, true); - if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinValue() == 8) { - auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo()); - auto *One = ConstantInt::getSigned(II->getType(), 1); - auto *CGep = - ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One); - replaceAllUsesWith(II, ConstantExpr::getPtrToInt(CGep, II->getType()), - FreshBBs, IsHugeFunc); - II->eraseFromParent(); - return true; - } - break; - } case Intrinsic::masked_gather: return optimizeGatherScatterInst(II, II->getArgOperand(0)); case Intrinsic::masked_scatter: @@ -2442,6 +2493,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, if (!RetI) return false; + assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop"); + PHINode *PN = nullptr; ExtractValueInst *EVI = nullptr; BitCastInst *BCI = nullptr; @@ -2687,7 +2740,7 @@ void ExtAddrMode::print(raw_ostream &OS) const { if (InBounds) OS << "inbounds "; if (BaseGV) { - OS << (NeedPlus ? " + " : "") << "GV:"; + OS << "GV:"; BaseGV->printAsOperand(OS, /*PrintType=*/false); NeedPlus = true; } @@ -3073,6 +3126,9 @@ class TypePromotionTransaction { ~InstructionRemover() override { delete Replacer; } + InstructionRemover &operator=(const InstructionRemover &other) = delete; + InstructionRemover(const InstructionRemover &other) = delete; + /// Resurrect the instruction and reassign it to the proper uses if /// new value was provided when build this action. void undo() override { @@ -3258,7 +3314,7 @@ class AddressingModeMatcher { bool IgnoreProfitability; /// True if we are optimizing for size. - bool OptSize; + bool OptSize = false; ProfileSummaryInfo *PSI; BlockFrequencyInfo *BFI; @@ -3574,10 +3630,15 @@ private: /// Original Address. Value *Original; + /// Common value among addresses + Value *CommonValue = nullptr; + public: AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) : SQ(_SQ), Original(OriginalValue) {} + ~AddressingModeCombiner() { eraseCommonValueIfDead(); } + /// Get the combined AddrMode const ExtAddrMode &getAddrMode() const { return AddrModes[0]; } @@ -3662,13 +3723,21 @@ public: if (!initializeMap(Map)) return false; - Value *CommonValue = findCommon(Map); + CommonValue = findCommon(Map); if (CommonValue) AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes); return CommonValue != nullptr; } private: + /// `CommonValue` may be a placeholder inserted by us. + /// If the placeholder is not used, we should remove this dead instruction. + void eraseCommonValueIfDead() { + if (CommonValue && CommonValue->getNumUses() == 0) + if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue)) + CommonInst->eraseFromParent(); + } + /// Initialize Map with anchor values. For address seen /// we set the value of different field saw in this address. /// At the same time we find a common type for different field we will @@ -3866,17 +3935,17 @@ private: SimplificationTracker &ST) { while (!TraverseOrder.empty()) { Value *Current = TraverseOrder.pop_back_val(); - assert(Map.find(Current) != Map.end() && "No node to fill!!!"); + assert(Map.contains(Current) && "No node to fill!!!"); Value *V = Map[Current]; if (SelectInst *Select = dyn_cast<SelectInst>(V)) { // CurrentValue also must be Select. auto *CurrentSelect = cast<SelectInst>(Current); auto *TrueValue = CurrentSelect->getTrueValue(); - assert(Map.find(TrueValue) != Map.end() && "No True Value!"); + assert(Map.contains(TrueValue) && "No True Value!"); Select->setTrueValue(ST.Get(Map[TrueValue])); auto *FalseValue = CurrentSelect->getFalseValue(); - assert(Map.find(FalseValue) != Map.end() && "No False Value!"); + assert(Map.contains(FalseValue) && "No False Value!"); Select->setFalseValue(ST.Get(Map[FalseValue])); } else { // Must be a Phi node then. @@ -3884,7 +3953,7 @@ private: // Fill the Phi node with values from predecessors. for (auto *B : predecessors(PHI->getParent())) { Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B); - assert(Map.find(PV) != Map.end() && "No predecessor Value!"); + assert(Map.contains(PV) && "No predecessor Value!"); PHI->addIncoming(ST.Get(Map[PV]), B); } } @@ -3908,7 +3977,7 @@ private: while (!Worklist.empty()) { Value *Current = Worklist.pop_back_val(); // if it is already visited or it is an ending value then skip it. - if (Map.find(Current) != Map.end()) + if (Map.contains(Current)) continue; TraverseOrder.push_back(Current); @@ -4627,7 +4696,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, return false; } case Instruction::Add: { - // Check to see if we can merge in the RHS then the LHS. If so, we win. + // Check to see if we can merge in one operand, then the other. If so, we + // win. ExtAddrMode BackupAddrMode = AddrMode; unsigned OldSize = AddrModeInsts.size(); // Start a transaction at this point. @@ -4637,9 +4707,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); + // Try to match an integer constant second to increase its chance of ending + // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`. + int First = 0, Second = 1; + if (isa<ConstantInt>(AddrInst->getOperand(First)) + && !isa<ConstantInt>(AddrInst->getOperand(Second))) + std::swap(First, Second); AddrMode.InBounds = false; - if (matchAddr(AddrInst->getOperand(1), Depth + 1) && - matchAddr(AddrInst->getOperand(0), Depth + 1)) + if (matchAddr(AddrInst->getOperand(First), Depth + 1) && + matchAddr(AddrInst->getOperand(Second), Depth + 1)) return true; // Restore the old addr mode info. @@ -4647,9 +4723,10 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, AddrModeInsts.resize(OldSize); TPT.rollback(LastKnownGood); - // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. - if (matchAddr(AddrInst->getOperand(0), Depth + 1) && - matchAddr(AddrInst->getOperand(1), Depth + 1)) + // Otherwise this was over-aggressive. Try merging operands in the opposite + // order. + if (matchAddr(AddrInst->getOperand(Second), Depth + 1) && + matchAddr(AddrInst->getOperand(First), Depth + 1)) return true; // Otherwise we definitely can't merge the ADD in. @@ -4698,7 +4775,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { const APInt &CVal = CI->getValue(); - if (CVal.getMinSignedBits() <= 64) { + if (CVal.getSignificantBits() <= 64) { ConstantOffset += CVal.getSExtValue() * TypeSize; continue; } @@ -4718,36 +4795,35 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, // just add it to the disp field and check validity. if (VariableOperand == -1) { AddrMode.BaseOffs += ConstantOffset; - if (ConstantOffset == 0 || - TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { - // Check to see if we can fold the base pointer in too. - if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { + if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { if (!cast<GEPOperator>(AddrInst)->isInBounds()) AddrMode.InBounds = false; return true; - } - } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) && - TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && - ConstantOffset > 0) { - // Record GEPs with non-zero offsets as candidates for splitting in the - // event that the offset cannot fit into the r+i addressing mode. - // Simple and common case that only one GEP is used in calculating the - // address for the memory access. - Value *Base = AddrInst->getOperand(0); - auto *BaseI = dyn_cast<Instruction>(Base); - auto *GEP = cast<GetElementPtrInst>(AddrInst); - if (isa<Argument>(Base) || isa<GlobalValue>(Base) || - (BaseI && !isa<CastInst>(BaseI) && - !isa<GetElementPtrInst>(BaseI))) { - // Make sure the parent block allows inserting non-PHI instructions - // before the terminator. - BasicBlock *Parent = - BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); - if (!Parent->getTerminator()->isEHPad()) - LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); - } } AddrMode.BaseOffs -= ConstantOffset; + + if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) && + TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && + ConstantOffset > 0) { + // Record GEPs with non-zero offsets as candidates for splitting in + // the event that the offset cannot fit into the r+i addressing mode. + // Simple and common case that only one GEP is used in calculating the + // address for the memory access. + Value *Base = AddrInst->getOperand(0); + auto *BaseI = dyn_cast<Instruction>(Base); + auto *GEP = cast<GetElementPtrInst>(AddrInst); + if (isa<Argument>(Base) || isa<GlobalValue>(Base) || + (BaseI && !isa<CastInst>(BaseI) && + !isa<GetElementPtrInst>(BaseI))) { + // Make sure the parent block allows inserting non-PHI instructions + // before the terminator. + BasicBlock *Parent = BaseI ? BaseI->getParent() + : &GEP->getFunction()->getEntryBlock(); + if (!Parent->getTerminator()->isEHPad()) + LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); + } + } + return false; } @@ -4963,18 +5039,14 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, return true; } -// Max number of memory uses to look at before aborting the search to conserve -// compile time. -static constexpr int MaxMemoryUsesToScan = 20; - /// Recursively walk all the uses of I until we find a memory use. /// If we find an obviously non-foldable instruction, return true. /// Add accessed addresses and types to MemoryUses. static bool FindAllMemoryUses( - Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses, + Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses, SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, int SeenInsts = 0) { + BlockFrequencyInfo *BFI, unsigned &SeenInsts) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -4987,33 +5059,33 @@ static bool FindAllMemoryUses( for (Use &U : I->uses()) { // Conservatively return true if we're seeing a large number or a deep chain // of users. This avoids excessive compilation times in pathological cases. - if (SeenInsts++ >= MaxMemoryUsesToScan) + if (SeenInsts++ >= MaxAddressUsersToScan) return true; Instruction *UserI = cast<Instruction>(U.getUser()); if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { - MemoryUses.push_back({U.get(), LI->getType()}); + MemoryUses.push_back({&U, LI->getType()}); continue; } if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) return true; // Storing addr, not into addr. - MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()}); + MemoryUses.push_back({&U, SI->getValueOperand()->getType()}); continue; } if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) { if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex()) return true; // Storing addr, not into addr. - MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()}); + MemoryUses.push_back({&U, RMW->getValOperand()->getType()}); continue; } if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) { if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex()) return true; // Storing addr, not into addr. - MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()}); + MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()}); continue; } @@ -5045,6 +5117,17 @@ static bool FindAllMemoryUses( return false; } +static bool FindAllMemoryUses( + Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses, + const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + unsigned SeenInsts = 0; + SmallPtrSet<Instruction *, 16> ConsideredInsts; + return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, + PSI, BFI, SeenInsts); +} + + /// Return true if Val is already known to be live at the use site that we're /// folding it into. If so, there is no cost to include it in the addressing /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the @@ -5126,10 +5209,8 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode( // we can remove the addressing mode and effectively trade one live register // for another (at worst.) In this context, folding an addressing mode into // the use is just a particularly nice way of sinking it. - SmallVector<std::pair<Value *, Type *>, 16> MemoryUses; - SmallPtrSet<Instruction *, 16> ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI, - BFI)) + SmallVector<std::pair<Use *, Type *>, 16> MemoryUses; + if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -5142,8 +5223,9 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode( // growth since most architectures have some reasonable small and fast way to // compute an effective address. (i.e LEA on x86) SmallVector<Instruction *, 32> MatchedAddrModeInsts; - for (const std::pair<Value *, Type *> &Pair : MemoryUses) { - Value *Address = Pair.first; + for (const std::pair<Use *, Type *> &Pair : MemoryUses) { + Value *Address = Pair.first->get(); + Instruction *UserI = cast<Instruction>(Pair.first->getUser()); Type *AddressAccessTy = Pair.second; unsigned AS = Address->getType()->getPointerAddressSpace(); @@ -5156,7 +5238,7 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode( TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn, - AddressAccessTy, AS, MemoryInst, Result, + AddressAccessTy, AS, UserI, Result, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI); Matcher.IgnoreProfitability = true; @@ -5693,7 +5775,8 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, // Create a scalar GEP if there are more than 2 operands. if (Ops.size() != 2) { // Replace the last index with 0. - Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); + Ops[FinalIndex] = + Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType()); Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front()); SourceTy = GetElementPtrInst::getIndexedType( SourceTy, ArrayRef(Ops).drop_front()); @@ -6027,6 +6110,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { int64_t Offset = LargeOffsetGEP->second; if (Offset != BaseOffset) { TargetLowering::AddrMode AddrMode; + AddrMode.HasBaseReg = true; AddrMode.BaseOffs = Offset - BaseOffset; // The result type of the GEP might not be the type of the memory // access. @@ -6044,7 +6128,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { // Generate a new GEP to replace the current one. LLVMContext &Ctx = GEP->getContext(); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); Type *I8PtrTy = Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace()); Type *I8Ty = Type::getInt8Ty(Ctx); @@ -6062,7 +6146,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) { NewBaseInsertBB = - SplitEdge(NewBaseInsertBB, Invoke->getNormalDest()); + SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI); NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); } else NewBaseInsertPt = std::next(BaseI->getIterator()); @@ -6074,7 +6158,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { } IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); // Create a new base. - Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset); + Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset); NewBaseGEP = OldBase; if (NewBaseGEP->getType() != I8PtrTy) NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); @@ -6090,7 +6174,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); } else { // Calculate the new offset for the new GEP. - Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset); + Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset); NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); if (GEP->getType() != I8PtrTy) @@ -6872,9 +6956,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { return false; TargetLowering::SelectSupportKind SelectKind; - if (VectorCond) - SelectKind = TargetLowering::VectorMaskSelect; - else if (SI->getType()->isVectorTy()) + if (SI->getType()->isVectorTy()) SelectKind = TargetLowering::ScalarCondVectorVal; else SelectKind = TargetLowering::ScalarValSelect; @@ -6915,88 +6997,88 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // first branch will point directly to select.end, and the corresponding PHI // predecessor block will be the start block. - // First, we split the block containing the select into 2 blocks. + // Collect values that go on the true side and the values that go on the false + // side. + SmallVector<Instruction *> TrueInstrs, FalseInstrs; + for (SelectInst *SI : ASI) { + if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V)) + TrueInstrs.push_back(cast<Instruction>(V)); + if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V)) + FalseInstrs.push_back(cast<Instruction>(V)); + } + + // Split the select block, according to how many (if any) values go on each + // side. BasicBlock *StartBlock = SI->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); - BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); - if (IsHugeFunc) - FreshBBs.insert(EndBlock); - BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); - // Delete the unconditional branch that was just created by the split. - StartBlock->getTerminator()->eraseFromParent(); + IRBuilder<> IB(SI); + auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); - // These are the new basic blocks for the conditional branch. - // At least one will become an actual new basic block. BasicBlock *TrueBlock = nullptr; BasicBlock *FalseBlock = nullptr; + BasicBlock *EndBlock = nullptr; BranchInst *TrueBranch = nullptr; BranchInst *FalseBranch = nullptr; - - // Sink expensive instructions into the conditional blocks to avoid executing - // them speculatively. - for (SelectInst *SI : ASI) { - if (sinkSelectOperand(TTI, SI->getTrueValue())) { - if (TrueBlock == nullptr) { - TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", - EndBlock->getParent(), EndBlock); - TrueBranch = BranchInst::Create(EndBlock, TrueBlock); - if (IsHugeFunc) - FreshBBs.insert(TrueBlock); - TrueBranch->setDebugLoc(SI->getDebugLoc()); - } - auto *TrueInst = cast<Instruction>(SI->getTrueValue()); - TrueInst->moveBefore(TrueBranch); - } - if (sinkSelectOperand(TTI, SI->getFalseValue())) { - if (FalseBlock == nullptr) { - FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", - EndBlock->getParent(), EndBlock); - if (IsHugeFunc) - FreshBBs.insert(FalseBlock); - FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - FalseBranch->setDebugLoc(SI->getDebugLoc()); - } - auto *FalseInst = cast<Instruction>(SI->getFalseValue()); - FalseInst->moveBefore(FalseBranch); - } + if (TrueInstrs.size() == 0) { + FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse( + CondFr, &*SplitPt, false, nullptr, nullptr, LI)); + FalseBlock = FalseBranch->getParent(); + EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0)); + } else if (FalseInstrs.size() == 0) { + TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen( + CondFr, &*SplitPt, false, nullptr, nullptr, LI)); + TrueBlock = TrueBranch->getParent(); + EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0)); + } else { + Instruction *ThenTerm = nullptr; + Instruction *ElseTerm = nullptr; + SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm, + nullptr, nullptr, LI); + TrueBranch = cast<BranchInst>(ThenTerm); + FalseBranch = cast<BranchInst>(ElseTerm); + TrueBlock = TrueBranch->getParent(); + FalseBlock = FalseBranch->getParent(); + EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0)); + } + + EndBlock->setName("select.end"); + if (TrueBlock) + TrueBlock->setName("select.true.sink"); + if (FalseBlock) + FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false" + : "select.false.sink"); + + if (IsHugeFunc) { + if (TrueBlock) + FreshBBs.insert(TrueBlock); + if (FalseBlock) + FreshBBs.insert(FalseBlock); + FreshBBs.insert(EndBlock); } - // If there was nothing to sink, then arbitrarily choose the 'false' side - // for a new input value to the PHI. - if (TrueBlock == FalseBlock) { - assert(TrueBlock == nullptr && - "Unexpected basic block transform while optimizing select"); + BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); - FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", - EndBlock->getParent(), EndBlock); - if (IsHugeFunc) - FreshBBs.insert(FalseBlock); - auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - FalseBranch->setDebugLoc(SI->getDebugLoc()); - } + static const unsigned MD[] = { + LLVMContext::MD_prof, LLVMContext::MD_unpredictable, + LLVMContext::MD_make_implicit, LLVMContext::MD_dbg}; + StartBlock->getTerminator()->copyMetadata(*SI, MD); + + // Sink expensive instructions into the conditional blocks to avoid executing + // them speculatively. + for (Instruction *I : TrueInstrs) + I->moveBefore(TrueBranch); + for (Instruction *I : FalseInstrs) + I->moveBefore(FalseBranch); - // Insert the real conditional branch based on the original condition. // If we did not create a new block for one of the 'true' or 'false' paths // of the condition, it means that side of the branch goes to the end block // directly and the path originates from the start block from the point of // view of the new PHI. - BasicBlock *TT, *FT; - if (TrueBlock == nullptr) { - TT = EndBlock; - FT = FalseBlock; + if (TrueBlock == nullptr) TrueBlock = StartBlock; - } else if (FalseBlock == nullptr) { - TT = TrueBlock; - FT = EndBlock; + else if (FalseBlock == nullptr) FalseBlock = StartBlock; - } else { - TT = TrueBlock; - FT = FalseBlock; - } - IRBuilder<> IB(SI); - auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); - IB.CreateCondBr(CondFr, TT, FT, SI); SmallPtrSet<const Instruction *, 2> INS; INS.insert(ASI.begin(), ASI.end()); @@ -7105,7 +7187,7 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { if (IsHugeFunc) { // Now we clone an instruction, its operands' defs may sink to this BB - // now. So we put the operands defs' BBs into FreshBBs to do optmization. + // now. So we put the operands defs' BBs into FreshBBs to do optimization. for (unsigned I = 0; I < NI->getNumOperands(); ++I) { auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I)); if (!OpDef) @@ -7696,7 +7778,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, // whereas scalable vectors would have to be shifted by // <2log(vscale) + number of bits> in order to store the // low/high parts. Bailing out for now. - if (isa<ScalableVectorType>(StoreType)) + if (StoreType->isScalableTy()) return false; if (!DL.typeSizeEqualsStoreSize(StoreType) || @@ -8051,8 +8133,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { return true; if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) && - TLI->optimizeExtendOrTruncateConversion(I, - LI->getLoopFor(I->getParent()))) + TLI->optimizeExtendOrTruncateConversion( + I, LI->getLoopFor(I->getParent()), *TTI)) return true; if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { @@ -8064,7 +8146,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { return SinkCast(CI); } else { if (TLI->optimizeExtendOrTruncateConversion( - I, LI->getLoopFor(I->getParent()))) + I, LI->getLoopFor(I->getParent()), *TTI)) return true; bool MadeChange = optimizeExt(I); @@ -8128,7 +8210,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { GEPI->getName(), GEPI); NC->setDebugLoc(GEPI->getDebugLoc()); replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc); - GEPI->eraseFromParent(); + RecursivelyDeleteTriviallyDeadInstructions( + GEPI, TLInfo, nullptr, + [&](Value *V) { removeAllAssertingVHReferences(V); }); ++NumGEPsElim; optimizeInst(NC, ModifiedDT); return true; |