diff options
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/ConstantFolding.cpp | 7 | ||||
-rw-r--r-- | lib/Analysis/IndirectCallPromotionAnalysis.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/InlineCost.cpp | 132 | ||||
-rw-r--r-- | lib/Analysis/LazyValueInfo.cpp | 69 | ||||
-rw-r--r-- | lib/Analysis/ModuleSummaryAnalysis.cpp | 12 | ||||
-rw-r--r-- | lib/Analysis/OrderedBasicBlock.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/RegionPass.cpp | 16 |
7 files changed, 121 insertions, 119 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 6a1af87450c9..a906770dbb34 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1170,7 +1170,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, const DataLayout &DL, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp 0, (ptrtoint x) -> icmp null, x // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y // @@ -1240,6 +1242,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); } + } else if (isa<ConstantExpr>(Ops1)) { + // If RHS is a constant expression, but the left side isn't, swap the + // operands and try again. + Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate); + return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } return ConstantExpr::getCompare(Predicate, Ops0, Ops1); diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 3da33ac71421..ed233d201537 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -43,7 +43,7 @@ static cl::opt<unsigned> // The percent threshold for the direct-call target (this call site vs the // total call count) for it to be considered as the promotion target. static cl::opt<unsigned> - ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, + ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the promotion")); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 4702569126c6..77c87928728a 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -54,11 +54,6 @@ static cl::opt<int> cl::init(45), cl::desc("Threshold for inlining cold callsites")); -static cl::opt<bool> - EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden, - cl::init(false), - cl::desc("Enable generic switch cost model")); - // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. @@ -1015,83 +1010,68 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { if (isa<ConstantInt>(V)) return true; - if (EnableGenericSwitchCost) { - // Assume the most general case where the swith is lowered into - // either a jump table, bit test, or a balanced binary tree consisting of - // case clusters without merging adjacent clusters with the same - // destination. We do not consider the switches that are lowered with a mix - // of jump table/bit test/binary search tree. The cost of the switch is - // proportional to the size of the tree or the size of jump table range. - - // Exit early for a large switch, assuming one case needs at least one - // instruction. - // FIXME: This is not true for a bit test, but ignore such case for now to - // save compile-time. - int64_t CostLowerBound = - std::min((int64_t)INT_MAX, - (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - - if (CostLowerBound > Threshold) { - Cost = CostLowerBound; - return false; - } + // Assume the most general case where the swith is lowered into + // either a jump table, bit test, or a balanced binary tree consisting of + // case clusters without merging adjacent clusters with the same + // destination. We do not consider the switches that are lowered with a mix + // of jump table/bit test/binary search tree. The cost of the switch is + // proportional to the size of the tree or the size of jump table range. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. - unsigned JumpTableSize = 0; - unsigned NumCaseCluster = - TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + // Exit early for a large switch, assuming one case needs at least one + // instruction. + // FIXME: This is not true for a bit test, but ignore such case for now to + // save compile-time. + int64_t CostLowerBound = + std::min((int64_t)INT_MAX, + (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - // If suitable for a jump table, consider the cost for the table size and - // branch to destination. - if (JumpTableSize) { - int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + - 4 * InlineConstants::InstrCost; - Cost = std::min((int64_t)INT_MAX, JTCost + Cost); - return false; - } + if (CostLowerBound > Threshold) { + Cost = CostLowerBound; + return false; + } - // Considering forming a binary search, we should find the number of nodes - // which is same as the number of comparisons when lowered. For a given - // number of clusters, n, we can define a recursive function, f(n), to find - // the number of nodes in the tree. The recursion is : - // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, - // and f(n) = n, when n <= 3. - // This will lead a binary tree where the leaf should be either f(2) or f(3) - // when n > 3. So, the number of comparisons from leaves should be n, while - // the number of non-leaf should be : - // 2^(log2(n) - 1) - 1 - // = 2^log2(n) * 2^-1 - 1 - // = n / 2 - 1. - // Considering comparisons from leaf and non-leaf nodes, we can estimate the - // number of comparisons in a simple closed form : - // n + n / 2 - 1 = n * 3 / 2 - 1 - if (NumCaseCluster <= 3) { - // Suppose a comparison includes one compare and one conditional branch. - Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; - return false; - } - int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; - uint64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); + unsigned JumpTableSize = 0; + unsigned NumCaseCluster = + TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + + // If suitable for a jump table, consider the cost for the table size and + // branch to destination. + if (JumpTableSize) { + int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + + 4 * InlineConstants::InstrCost; + Cost = std::min((int64_t)INT_MAX, JTCost + Cost); return false; } - // Use a simple switch cost model where we accumulate a cost proportional to - // the number of distinct successor blocks. This fan-out in the CFG cannot - // be represented for free even if we can represent the core switch as a - // jumptable that takes a single instruction. - /// - // NB: We convert large switches which are just used to initialize large phi - // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent - // inlining those. It will prevent inlining in cases where the optimization - // does not (yet) fire. - SmallPtrSet<BasicBlock *, 8> SuccessorBlocks; - SuccessorBlocks.insert(SI.getDefaultDest()); - for (auto Case : SI.cases()) - SuccessorBlocks.insert(Case.getCaseSuccessor()); - // Add cost corresponding to the number of distinct destinations. The first - // we model as free because of fallthrough. - Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + // Considering forming a binary search, we should find the number of nodes + // which is same as the number of comparisons when lowered. For a given + // number of clusters, n, we can define a recursive function, f(n), to find + // the number of nodes in the tree. The recursion is : + // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, + // and f(n) = n, when n <= 3. + // This will lead a binary tree where the leaf should be either f(2) or f(3) + // when n > 3. So, the number of comparisons from leaves should be n, while + // the number of non-leaf should be : + // 2^(log2(n) - 1) - 1 + // = 2^log2(n) * 2^-1 - 1 + // = n / 2 - 1. + // Considering comparisons from leaf and non-leaf nodes, we can estimate the + // number of comparisons in a simple closed form : + // n + n / 2 - 1 = n * 3 / 2 - 1 + if (NumCaseCluster <= 3) { + // Suppose a comparison includes one compare and one conditional branch. + Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; + return false; + } + int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; + uint64_t SwitchCost = + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); return false; } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index a2b9015a8a1d..6a9ae6440ace 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -662,13 +662,13 @@ namespace { bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB); bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S, BasicBlock *BB); - bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI, BasicBlock *BB); - bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV, - Instruction *BBI); + Instruction *BBI); void solve(); @@ -849,12 +849,12 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, return true; } if (BBI->getType()->isIntegerTy()) { - if (isa<CastInst>(BBI)) - return solveBlockValueCast(Res, BBI, BB); - + if (auto *CI = dyn_cast<CastInst>(BBI)) + return solveBlockValueCast(Res, CI, BB); + BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); if (BO && isa<ConstantInt>(BO->getOperand(1))) - return solveBlockValueBinaryOp(Res, BBI, BB); + return solveBlockValueBinaryOp(Res, BO, BB); } DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -1168,9 +1168,9 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, } bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, - Instruction *BBI, - BasicBlock *BB) { - if (!BBI->getOperand(0)->getType()->isSized()) { + CastInst *CI, + BasicBlock *BB) { + if (!CI->getOperand(0)->getType()->isSized()) { // Without knowing how wide the input is, we can't analyze it in any useful // way. BBLV = LVILatticeVal::getOverdefined(); @@ -1180,7 +1180,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Filter out casts we don't know how to reason about before attempting to // recurse on our operand. This can cut a long search short if we know we're // not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + switch (CI->getOpcode()) { case Instruction::Trunc: case Instruction::SExt: case Instruction::ZExt: @@ -1197,44 +1197,43 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, we still apply the // transfer rule on the full set since we may be able to locally infer // interesting facts. - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(CI->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, CI->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(CI->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(CI->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal, + CI); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - const unsigned ResultBitWidth = - cast<IntegerType>(BBI->getType())->getBitWidth(); + const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth(); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto CastOp = (Instruction::CastOps) BBI->getOpcode(); - BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth)); + BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(), + ResultBitWidth)); return true; } bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, - Instruction *BBI, + BinaryOperator *BO, BasicBlock *BB) { - assert(BBI->getOperand(0)->getType()->isSized() && + assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); // Filter out operators we don't know how to reason about before attempting to // recurse on our operand(s). This can cut a long search short if we know - // we're not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + // we're not going to be able to get any useful information anyways. + switch (BO->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1256,29 +1255,29 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, use a conservative range, // but apply the transfer rule anyways. This lets us pick up facts from // expressions like "and i32 (call i32 @foo()), 32" - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(BO->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, BO->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(BO->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(BO->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal, + BO); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - ConstantInt *RHS = cast<ConstantInt>(BBI->getOperand(1)); + ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1)); ConstantRange RHSRange = ConstantRange(RHS->getValue()); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto BinOp = (Instruction::BinaryOps) BBI->getOpcode(); + Instruction::BinaryOps BinOp = BO->getOpcode(); BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange)); return true; } diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 26706f5509ba..3253f27c010d 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -275,7 +275,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. F.isVarArg(); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* LiveRoot = */ false); + /* Live = */ false); auto FuncSummary = llvm::make_unique<FunctionSummary>( Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), @@ -295,7 +295,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -308,7 +308,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet<GlobalValue::GUID> &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{}); auto *Aliasee = A.getBaseObject(); auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); @@ -323,7 +323,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name))) for (auto &Summary : VI.getSummaryList()) - Summary->setLiveRoot(); + Summary->setLive(true); } ModuleSummaryIndex llvm::buildModuleSummaryIndex( @@ -423,8 +423,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( return; assert(GV->isDeclaration() && "Def in module asm already has definition"); GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, - /* NotEligibleToImport */ true, - /* LiveRoot */ true); + /* NotEligibleToImport = */ true, + /* Live = */ true); CantBePromoted.insert(GlobalValue::getGUID(Name)); // Create the appropriate summary type. if (isa<Function>(GV)) { diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp index 0f0016f22cc0..a04c0aef04be 100644 --- a/lib/Analysis/OrderedBasicBlock.cpp +++ b/lib/Analysis/OrderedBasicBlock.cpp @@ -55,7 +55,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A, assert(II != IE && "Instruction not found?"); assert((Inst == A || Inst == B) && "Should find A or B"); LastInstFound = II; - return Inst == A; + return Inst != B; } /// \brief Find out whether \p A dominates \p B, meaning whether \p A diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 82107cb18025..b38e6225c840 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/IR/OptBisect.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" @@ -280,3 +281,18 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { return new PrintRegionPass(Banner, O); } + +bool RegionPass::skipRegion(Region &R) const { + Function &F = *R.getEntry()->getParent(); + if (!F.getContext().getOptBisect().shouldRunPass(this, R)) + return true; + + if (F.hasFnAttribute(Attribute::OptimizeNone)) { + // Report this only once per function. + if (R.getEntry() == &F.getEntryBlock()) + DEBUG(dbgs() << "Skipping pass '" << getPassName() + << "' on function " << F.getName() << "\n"); + return true; + } + return false; +} |