diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 |
commit | b7eb8e35e481a74962664b63dfb09483b200209a (patch) | |
tree | 1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd /lib/Transforms | |
parent | eb11fae6d08f479c0799db45860a98af528fa6e7 (diff) |
Diffstat (limited to 'lib/Transforms')
58 files changed, 702 insertions, 429 deletions
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 31e771da3bd3..cd2bd734eb26 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -56,7 +56,7 @@ using namespace llvm; STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); -STATISTIC(NumArgumentsReplacedWithUndef, +STATISTIC(NumArgumentsReplacedWithUndef, "Number of unread args replaced with undef"); namespace { @@ -109,7 +109,7 @@ namespace { char DAH::ID = 0; -INITIALIZE_PASS(DAH, "deadarghaX0r", +INITIALIZE_PASS(DAH, "deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", false, false) @@ -256,7 +256,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { return true; } -/// RemoveDeadArgumentsFromCallers - Checks if the given function has any +/// RemoveDeadArgumentsFromCallers - Checks if the given function has any /// arguments that are unused, and changes the caller parameters to be undefined /// instead. bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { @@ -640,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { Result = Live; } else { // See what the effect of this use is (recording any uses that cause - // MaybeLive in MaybeLiveArgUses). + // MaybeLive in MaybeLiveArgUses). Result = SurveyUses(&*AI, MaybeLiveArgUses); } @@ -777,7 +777,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // argument. // 2) Retain the 'returned' attribute and treat the return value (but not the // entire function) as live so that it is not eliminated. - // + // // It's not clear in the general case which option is more profitable because, // even in the absence of explicit uses of the return value, code generation // is free to use the 'returned' attribute to do things like eliding diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 2797da6c0abd..010b0a29807d 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -617,7 +617,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) { if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; } - + return Changed; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1af7e6894777..1761d7faff57 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -357,6 +357,41 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, return Changed; } +static bool isSafeSROAElementUse(Value *V); + +/// Return true if the specified GEP is a safe user of a derived +/// expression from a global that we want to SROA. +static bool isSafeSROAGEP(User *U) { + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some + // value of C. + if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || + !cast<Constant>(U->getOperand(1))->isNullValue()) + return false; + + gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); + ++GEPI; // Skip over the pointer index. + + // For all other level we require that the indices are constant and inrange. + // In particular, consider: A[0][i]. We cannot know that the user isn't doing + // invalid things like allowing i to index an out-of-range subscript that + // accesses A[1]. This can also happen between different members of a struct + // in llvm IR. + for (; GEPI != E; ++GEPI) { + if (GEPI.isStruct()) + continue; + + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); + if (!IdxVal || (GEPI.isBoundedSequential() && + IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) + return false; + } + + return llvm::all_of(U->users(), + [](User *UU) { return isSafeSROAElementUse(UU); }); +} + /// Return true if the specified instruction is a safe user of a derived /// expression from a global that we want to SROA. static bool isSafeSROAElementUse(Value *V) { @@ -374,84 +409,25 @@ static bool isSafeSROAElementUse(Value *V) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getOperand(0) != V; - // Otherwise, it must be a GEP. - GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); - if (!GEPI) return false; - - if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || - !cast<Constant>(GEPI->getOperand(1))->isNullValue()) - return false; - - for (User *U : GEPI->users()) - if (!isSafeSROAElementUse(U)) - return false; - return true; -} - -/// U is a direct user of the specified global value. Look at it and its uses -/// and decide whether it is safe to SROA this global. -static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { - // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa<GetElementPtrInst>(U) && - (!isa<ConstantExpr>(U) || - cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) - return false; - - // Check to see if this ConstantExpr GEP is SRA'able. In particular, we - // don't like < 3 operand CE's, and we don't like non-constant integer - // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some - // value of C. - if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || - !cast<Constant>(U->getOperand(1))->isNullValue() || - !isa<ConstantInt>(U->getOperand(2))) - return false; - - gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); - ++GEPI; // Skip over the pointer index. - - // If this is a use of an array allocation, do a bit more checking for sanity. - if (GEPI.isSequential()) { - ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); - - // Check to make sure that index falls within the array. If not, - // something funny is going on, so we won't do the optimization. - // - if (GEPI.isBoundedSequential() && - Idx->getZExtValue() >= GEPI.getSequentialNumElements()) - return false; - - // We cannot scalar repl this level of the array unless any array - // sub-indices are in-range constants. In particular, consider: - // A[0][i]. We cannot know that the user isn't doing invalid things like - // allowing i to index an out-of-range subscript that accesses A[1]. - // - // Scalar replacing *just* the outer index of the array is probably not - // going to be a win anyway, so just give up. - for (++GEPI; // Skip array index. - GEPI != E; - ++GEPI) { - if (GEPI.isStruct()) - continue; - - ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); - if (!IdxVal || - (GEPI.isBoundedSequential() && - IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) - return false; - } - } - - return llvm::all_of(U->users(), - [](User *UU) { return isSafeSROAElementUse(UU); }); + // Otherwise, it must be a GEP. Check it and its users are safe to SRA. + return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I); } /// Look at all uses of the global and decide whether it is safe for us to /// perform this transformation. static bool GlobalUsersSafeToSRA(GlobalValue *GV) { - for (User *U : GV->users()) - if (!IsUserOfGlobalSafeForSRA(U, GV)) + for (User *U : GV->users()) { + // The user of the global must be a GEP Inst or a ConstantExpr GEP. + if (!isa<GetElementPtrInst>(U) && + (!isa<ConstantExpr>(U) || + cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) return false; + // Check the gep and it's users are safe to SRA + if (!isSafeSROAGEP(U)) + return false; + } + return true; } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index f79b61037f1d..7d55ebecbf92 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -61,12 +61,12 @@ static bool PropagateConstantsIntoArguments(Function &F) { User *UR = U.getUser(); // Ignore blockaddress uses. if (isa<BlockAddress>(UR)) continue; - + // Used by a non-instruction, or not the callee of a function, do not // transform. if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR)) return false; - + CallSite CS(cast<Instruction>(UR)); if (!CS.isCallee(&U)) return false; @@ -77,11 +77,11 @@ static bool PropagateConstantsIntoArguments(Function &F) { Function::arg_iterator Arg = F.arg_begin(); for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI, ++Arg) { - + // If this argument is known non-constant, ignore it. if (ArgumentConstants[i].second) continue; - + Constant *C = dyn_cast<Constant>(*AI); if (C && ArgumentConstants[i].first == nullptr) { ArgumentConstants[i].first = C; // First constant seen. @@ -108,7 +108,7 @@ static bool PropagateConstantsIntoArguments(Function &F) { if (ArgumentConstants[i].second || AI->use_empty() || AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; - + Value *V = ArgumentConstants[i].first; if (!V) V = UndefValue::get(AI->getType()); AI->replaceAllUsesWith(V); @@ -147,7 +147,7 @@ static bool PropagateConstantReturn(Function &F) { SmallVector<Value *,4> RetVals; StructType *STy = dyn_cast<StructType>(F.getReturnType()); if (STy) - for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) + for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) RetVals.push_back(UndefValue::get(STy->getElementType(i))); else RetVals.push_back(UndefValue::get(F.getReturnType())); @@ -172,7 +172,7 @@ static bool PropagateConstantReturn(Function &F) { // Ignore undefs, we can change them into anything if (isa<UndefValue>(V)) continue; - + // Try to see if all the rets return the same constant or argument. if (isa<Constant>(V) || isa<Argument>(V)) { if (isa<UndefValue>(RV)) { @@ -206,7 +206,7 @@ static bool PropagateConstantReturn(Function &F) { // directly? if (!Call || !CS.isCallee(&U)) continue; - + // Call result not used? if (Call->use_empty()) continue; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 139941127dee..3bebb96c6d35 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -27,7 +27,7 @@ // -- We define Function* container class with custom "operator<" (FunctionPtr). // -- "FunctionPtr" instances are stored in std::set collection, so every // std::set::insert operation will give you result in log(N) time. -// +// // As an optimization, a hash of the function structure is calculated first, and // two functions are only compared if they have the same hash. This hash is // cheap to compute, and has the property that if function F == G according to @@ -383,7 +383,7 @@ bool MergeFunctions::runOnModule(Module &M) { for (Function &Func : M) { if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) { HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); - } + } } std::stable_sort( @@ -402,7 +402,7 @@ bool MergeFunctions::runOnModule(Module &M) { Deferred.push_back(WeakTrackingVH(I->second)); } } - + do { std::vector<WeakTrackingVH> Worklist; Deferred.swap(Worklist); @@ -802,11 +802,11 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN, Function *F = FN.getFunc(); assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 && "The two functions must be equal"); - + auto I = FNodesInTree.find(F); assert(I != FNodesInTree.end() && "F should be in FNodesInTree"); assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G"); - + FnTreeType::iterator IterToFNInFnTree = I->second; assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree."); // Remove F -> FN and insert G -> FN diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 27d791857314..2be654258aa8 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -77,13 +77,13 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. - // Definitions that are weak and not declared non-throwing might be + // Definitions that are weak and not declared non-throwing might be // overridden at linktime with something that throws, so assume that. // If this SCC includes the unwind instruction, we KNOW it throws, so // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) { diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index aa31e0d850dd..83054588a9aa 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -926,7 +926,13 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) { if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add)) return NV; - Value *X; + Value *X, *Y; + + // add (sub X, Y), -1 --> add (not Y), X + if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) && + match(Op1, m_AllOnes())) + return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X); + // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) @@ -1608,6 +1614,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y)))) return BinaryOperator::CreateSub(Y, X); + // (X + -1) - Y --> ~Y + X + if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes())))) + return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X); + + // Y - (X + 1) --> ~X + Y + if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One())))) + return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0); + if (Constant *C = dyn_cast<Constant>(Op0)) { bool IsNegate = match(C, m_ZeroInt()); Value *X; @@ -1858,7 +1872,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Constant *C; if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1)) return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I); - + // X - (-Y) --> X + Y if (match(Op1, m_FNeg(m_Value(Y)))) return BinaryOperator::CreateFAddFMF(Op0, Y, &I); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 372bc41f780e..3d758e2fe7c9 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1550,31 +1550,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return DeMorgan; { - Value *A = nullptr, *B = nullptr, *C = nullptr; - // A&(A^B) => A & ~B - { - Value *tmpOp0 = Op0; - Value *tmpOp1 = Op1; - if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) { - if (A == Op1 || B == Op1 ) { - tmpOp1 = Op0; - tmpOp0 = Op1; - // Simplify below - } - } - - if (match(tmpOp1, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) { - if (B == tmpOp0) { - std::swap(A, B); - } - // Notice that the pattern (A&(~B)) is actually (A&(-1^B)), so if - // A is originally -1 (or a vector of -1 and undefs), then we enter - // an endless loop. By checking that A is non-constant we ensure that - // we will never get to the loop. - if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder.CreateNot(B)); - } - } + Value *A, *B, *C; + // A & (A ^ B) --> A & ~B + if (match(Op1, m_OneUse(m_c_Xor(m_Specific(Op0), m_Value(B))))) + return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(B)); + // (A ^ B) & A --> A & ~B + if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B))))) + return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B)); // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index e8ea7396a96a..fd59c3a7c0c3 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2243,6 +2243,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); + // Casting pointers between the same type, but with different address spaces + // is an addrspace cast rather than a bitcast. + if ((DstElTy == SrcElTy) && + (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace())) + return new AddrSpaceCastInst(Src, DestTy); + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 742caf649007..62769f077b47 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -518,7 +518,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) { assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) && "can't fold an atomic store of requested type"); - + Value *Ptr = SI.getPointerOperand(); unsigned AS = SI.getPointerAddressSpace(); SmallVector<std::pair<unsigned, MDNode *>, 8> MD; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 4867808478a3..796b4021d273 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -54,6 +54,36 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder, return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } +/// Fold +/// %A = icmp eq/ne i8 %x, 0 +/// %B = op i8 %x, %z +/// %C = select i1 %A, i8 %B, i8 %y +/// To +/// %C = select i1 %A, i8 %z, i8 %y +/// OP: binop with an identity constant +/// TODO: support for non-commutative and FP opcodes +static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) { + + Value *Cond = Sel.getCondition(); + Value *X, *Z; + Constant *C; + CmpInst::Predicate Pred; + if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) || + !ICmpInst::isEquality(Pred)) + return nullptr; + + bool IsEq = Pred == ICmpInst::ICMP_EQ; + auto *BO = + dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue()); + // TODO: support for undefs + if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) && + ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) { + Sel.setOperand(IsEq ? 1 : 2, Z); + return &Sel; + } + return nullptr; +} + /// This folds: /// select (icmp eq (and X, C1)), TC, FC /// iff C1 is a power 2 and the difference between TC and FC is a power-of-2. @@ -1961,5 +1991,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *Select = foldSelectCmpXchg(SI)) return Select; + if (Instruction *Select = foldSelectBinOpIdentity(SI)) + return Select; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 34f8037e519f..1ca75f3989d4 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -570,7 +570,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_OneUse(m_BinOp(FBO))))) { const APInt *C; if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal && - match(FBO->getOperand(1), m_APInt(C)) && + match(FBO->getOperand(1), m_APInt(C)) && canShiftBinOpWithConstantRHS(I, FBO, *C)) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), cast<Constant>(FBO->getOperand(1)), Op1); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 2560feb37d66..1c2de6352fa5 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -605,7 +605,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) { return nullptr; Value *SplatVal = InsElt.getOperand(1); - InsertElementInst *CurrIE = &InsElt; + InsertElementInst *CurrIE = &InsElt; SmallVector<bool, 16> ElementPresent(NumElements, false); InsertElementInst *FirstIE = nullptr; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 12fcc8752ea9..cff0d5447290 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1424,7 +1424,7 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) { bool ConstOp1 = isa<Constant>(Inst.getOperand(1)); if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1)) NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1); - + // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) Value *NewLHS = isa<Constant>(LHS) ? NewC : V1; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b3f659194558..6af44354225c 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2464,10 +2464,10 @@ bool AddressSanitizer::runOnFunction(Function &F) { // If needed, insert __asan_init before checking for SanitizeAddress attr. // This function needs to be called even if the function body is not - // instrumented. + // instrumented. if (maybeInsertAsanInitAtFunctionEntry(F)) FunctionModified = true; - + // Leave if the function doesn't need instrumentation. if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified; diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index acd27c2e226f..132e8089fe3b 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -148,7 +148,7 @@ public: } StringRef getPassName() const override { return "GCOV Profiler"; } - bool runOnModule(Module &M) override { + bool runOnModule(Module &M) override { auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); return Profiler.runOnModule(M, TLI); } diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 22076f04d6ad..4d5dfb0aa66b 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -898,7 +898,7 @@ void InstrProfiling::emitRegistration() { IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); for (Value *Data : UsedVars) - if (Data != NamesVar) + if (Data != NamesVar && !isa<Function>(Data)) IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); if (NamesVar) { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index fa7bcec677f7..0830ff5dd042 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -280,7 +280,7 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I, return false; // Sign extend the offset to 64 bits (so that it is like all of the other - // expressions). + // expressions). unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits(); if (OffSCEVBits < 64) OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 3a675b979017..55759e8b1661 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -781,7 +781,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, this->TTI = &TTI; this->DT = &DT; this->BFI = BFI; - this->Entry = &Entry; + this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ea148b728a10..2f2d7f620a29 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -473,7 +473,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { // relatively expensive analysis for constants which are obviously either // null or non-null to start with. if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && - !isa<Constant>(V) && + !isa<Constant>(V) && LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) @@ -670,12 +670,12 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { Value *Op0 = C->getOperand(0); Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); if (!Op1) return nullptr; - + LazyValueInfo::Tristate Result = LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At); if (Result == LazyValueInfo::Unknown) return nullptr; - + return (Result == LazyValueInfo::True) ? ConstantInt::getTrue(C->getContext()) : ConstantInt::getFalse(C->getContext()); @@ -747,7 +747,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, if (auto *C = getConstantAt(RetVal, RI, LVI)) { ++NumReturns; RI->replaceUsesOfWith(RetVal, C); - BBChanged = true; + BBChanged = true; } } } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index dd1a2a6adb82..9a7405e98e7d 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -188,7 +188,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I, /// returns true, this function and getLocForRead completely describe the memory /// operations for this instruction. static MemoryLocation getLocForWrite(Instruction *Inst) { - + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return MemoryLocation::get(SI); diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 565745d12e99..533d16e088c8 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -384,7 +384,7 @@ public: LoadMapAllocator>; LoadHTType AvailableLoads; - + // A scoped hash table mapping memory locations (represented as typed // addresses) to generation numbers at which that memory location became // (henceforth indefinitely) invariant. @@ -844,7 +844,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // start a scope in the current generaton which is true for all future // generations. Also, we dont need to consume the last store since the // semantics of invariant.start allow us to perform DSE of the last - // store, if there was a store following invariant.start. Consider: + // store, if there was a store following invariant.start. Consider: // // store 30, i8* p // invariant.start(p) @@ -852,7 +852,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // We can DSE the store to 30, since the store 40 to invariant location p // causes undefined behaviour. if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) { - // If there are any uses, the scope might end. + // If there are any uses, the scope might end. if (!Inst->use_empty()) continue; auto *CI = cast<CallInst>(Inst); diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 28c5940db1e0..8959038de596 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -568,7 +568,7 @@ public: ReversePostOrderTraversal<Function*> RPOT(&F); for (auto *N : RPOT) NumSunk += sinkBB(N); - + return NumSunk > 0; } diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index ad1598d7b8bf..055fcbc8436f 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -43,6 +43,7 @@ #include <functional> #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/PostDominators.h" @@ -61,6 +62,8 @@ using namespace llvm; #define DEBUG_TYPE "guard-widening" +STATISTIC(GuardsEliminated, "Number of eliminated guards"); + namespace { class GuardWideningImpl { @@ -75,21 +78,33 @@ class GuardWideningImpl { /// The set of guards whose conditions have been widened into dominating /// guards. - SmallVector<IntrinsicInst *, 16> EliminatedGuards; + SmallVector<Instruction *, 16> EliminatedGuards; /// The set of guards which have been widened to include conditions to other /// guards. - DenseSet<IntrinsicInst *> WidenedGuards; + DenseSet<Instruction *> WidenedGuards; /// Try to eliminate guard \p Guard by widening it into an earlier dominating /// guard. \p DFSI is the DFS iterator on the dominator tree that is /// currently visiting the block containing \p Guard, and \p GuardsPerBlock /// maps BasicBlocks to the set of guards seen in that block. bool eliminateGuardViaWidening( - IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI, - const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> & + Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI, + const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & GuardsPerBlock); + // Get the condition from \p GuardInst. + Value *getGuardCondition(Instruction *GuardInst); + + // Set the condition for \p GuardInst. + void setGuardCondition(Instruction *GuardInst, Value *NewCond); + + // Whether or not the particular instruction is a guard. + bool isGuard(const Instruction *I); + + // Eliminates the guard instruction properly. + void eliminateGuard(Instruction *GuardInst); + /// Used to keep track of which widening potential is more effective. enum WideningScore { /// Don't widen. @@ -113,9 +128,9 @@ class GuardWideningImpl { /// Compute the score for widening the condition in \p DominatedGuard /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in /// \p DominatingGuardLoop). - WideningScore computeWideningScore(IntrinsicInst *DominatedGuard, + WideningScore computeWideningScore(Instruction *DominatedGuard, Loop *DominatedGuardLoop, - IntrinsicInst *DominatingGuard, + Instruction *DominatingGuard, Loop *DominatingGuardLoop); /// Helper to check if \p V can be hoisted to \p InsertPos. @@ -206,10 +221,10 @@ class GuardWideningImpl { /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to /// whatever it is already checking). - void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) { + void widenGuard(Instruction *ToWiden, Value *NewCondition) { Value *Result; - widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result); - ToWiden->setArgOperand(0, Result); + widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result); + setGuardCondition(ToWiden, Result); } public: @@ -225,9 +240,7 @@ public: } bool GuardWideningImpl::run() { - using namespace llvm::PatternMatch; - - DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock; + DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; bool Changed = false; for (auto DFI = df_begin(Root), DFE = df_end(Root); @@ -239,8 +252,8 @@ bool GuardWideningImpl::run() { auto &CurrentList = GuardsInBlock[BB]; for (auto &I : *BB) - if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>())) - CurrentList.push_back(cast<IntrinsicInst>(&I)); + if (isGuard(&I)) + CurrentList.push_back(cast<Instruction>(&I)); for (auto *II : CurrentList) Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock); @@ -249,16 +262,16 @@ bool GuardWideningImpl::run() { assert(EliminatedGuards.empty() || Changed); for (auto *II : EliminatedGuards) if (!WidenedGuards.count(II)) - II->eraseFromParent(); + eliminateGuard(II); return Changed; } bool GuardWideningImpl::eliminateGuardViaWidening( - IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI, - const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> & + Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI, + const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & GuardsInBlock) { - IntrinsicInst *BestSoFar = nullptr; + Instruction *BestSoFar = nullptr; auto BestScoreSoFar = WS_IllegalOrNegative; auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent()); @@ -302,8 +315,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening( for (auto *Candidate : make_range(I, E)) { auto Score = computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop); - LLVM_DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0) - << " and " << *Candidate->getArgOperand(0) << " is " + LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst) + << " and " << *getGuardCondition(Candidate) << " is " << scoreTypeToString(Score) << "\n"); if (Score > BestScoreSoFar) { BestScoreSoFar = Score; @@ -323,16 +336,41 @@ bool GuardWideningImpl::eliminateGuardViaWidening( LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar << " with score " << scoreTypeToString(BestScoreSoFar) << "\n"); - widenGuard(BestSoFar, GuardInst->getArgOperand(0)); - GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext())); + widenGuard(BestSoFar, getGuardCondition(GuardInst)); + setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext())); EliminatedGuards.push_back(GuardInst); WidenedGuards.insert(BestSoFar); return true; } +Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) { + IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst); + assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && + "Bad guard intrinsic?"); + return GI->getArgOperand(0); +} + +void GuardWideningImpl::setGuardCondition(Instruction *GuardInst, + Value *NewCond) { + IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst); + assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && + "Bad guard intrinsic?"); + GI->setArgOperand(0, NewCond); +} + +bool GuardWideningImpl::isGuard(const Instruction* I) { + using namespace llvm::PatternMatch; + return match(I, m_Intrinsic<Intrinsic::experimental_guard>()); +} + +void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) { + GuardInst->eraseFromParent(); + ++GuardsEliminated; +} + GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( - IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop, - IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) { + Instruction *DominatedGuard, Loop *DominatedGuardLoop, + Instruction *DominatingGuard, Loop *DominatingGuardLoop) { bool HoistingOutOfLoop = false; if (DominatingGuardLoop != DominatedGuardLoop) { @@ -345,7 +383,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( HoistingOutOfLoop = true; } - if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard)) + if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard)) return WS_IllegalOrNegative; // If the guard was conditional executed, it may never be reached @@ -355,9 +393,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( // case. At the moment, we really only consider the second in our heuristic // here. TODO: evaluate cost model for spurious deopt // NOTE: As written, this also lets us hoist right over another guard which - // is essentially just another spelling for control flow. - if (isWideningCondProfitable(DominatedGuard->getArgOperand(0), - DominatingGuard->getArgOperand(0))) + // is essentially just another spelling for control flow. + if (isWideningCondProfitable(getGuardCondition(DominatedGuard), + getGuardCondition(DominatingGuard))) return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; if (HoistingOutOfLoop) @@ -369,7 +407,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( auto MaybeHoistingOutOfIf = [&]() { auto *DominatingBlock = DominatingGuard->getParent(); auto *DominatedBlock = DominatedGuard->getParent(); - + // Same Block? if (DominatedBlock == DominatingBlock) return false; diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index e2f29705f2dd..c5ed6d5c1b87 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -735,7 +735,7 @@ static bool isSafeDecreasingBound(const SCEV *Start, assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1"); - + const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType())); unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) : @@ -786,7 +786,7 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType())); unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); - APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) : + APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) : APInt::getMaxValue(BitWidth); const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); @@ -798,7 +798,7 @@ static bool isSafeIncreasingBound(const SCEV *Start, static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L, ScalarEvolution &SE, bool Signed) { unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); - APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : + APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : APInt::getMinValue(BitWidth); auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; return SE.isAvailableAtLoopEntry(BoundSCEV, L) && diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index ff66632f0391..c4ea43a43249 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -455,7 +455,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // Keep track of whether the prefix of instructions visited so far are such // that the next instruction visited is guaranteed to execute if the loop - // is entered. + // is entered. bool IsMustExecute = CurLoop->getHeader() == BB; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { @@ -1186,9 +1186,9 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { if (isa<AllocaInst>(Object)) // Since the alloca goes out of scope, we know the caller can't retain a // reference to it and be well defined. Thus, we don't need to check for - // capture. + // capture. return true; - + // For all other objects we need to know that the caller can't possibly // have gotten a reference to the object. There are two components of // that: @@ -1282,7 +1282,7 @@ bool llvm::promoteLoopAccessesToScalars( // That said, we can't actually make the unwind edge explicit. Therefore, // we have to prove that the store is dead along the unwind edge. We do // this by proving that the caller can't have a reference to the object - // after return and thus can't possibly load from the object. + // after return and thus can't possibly load from the object. Value *Object = GetUnderlyingObject(SomePtr, MDL); if (!isKnownNonEscaping(Object, TLI)) return false; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d8692198f7a3..653948717fb9 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1573,7 +1573,7 @@ void LoopIdiomRecognize::transformLoopToCountable( InitXNext = Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1)); else - llvm_unreachable("Unexpected opcode!"); + llvm_unreachable("Unexpected opcode!"); } else InitXNext = InitX; CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 561ceea1d880..cbb6594cf8f4 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -74,7 +74,7 @@ // } // // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step) -// +// // Informal proof that the transformation above is correct: // // By the definition of guards we can rewrite the guard condition to: @@ -83,7 +83,7 @@ // Let's prove that for each iteration of the loop: // G(0) && M => G(I) // And the condition above can be simplified to G(Start) && M. -// +// // Induction base. // G(0) && M => G(0) // @@ -379,7 +379,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Instruction *InsertAt) { // TODO: we can check isLoopEntryGuardedByCond before emitting the check - + Type *Ty = LHS->getType(); assert(Ty == RHS->getType() && "expandCheck operands have different types?"); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 634215c9770f..e955821effa0 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -888,7 +888,7 @@ bool llvm::computeUnrollCount( UP.Count = 0; return false; } - + // Check if the runtime trip count is too small when profile is available. if (L->getHeader()->getParent()->hasProfileData()) { if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { @@ -897,7 +897,7 @@ bool llvm::computeUnrollCount( else UP.AllowExpensiveTripCount = true; } - } + } // Reduce count based on the type of unrolling and the threshold values. UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index b12586758925..6aad077ff19e 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -708,7 +708,7 @@ bool LoopUnswitch::processCurrentLoop() { // Unswitch only those branches that are reachable. if (isUnreachableDueToPreviousUnswitching(*I)) continue; - + // If this isn't branching on an invariant condition, we can't unswitch // it. if (BI->isConditional()) { @@ -754,7 +754,7 @@ bool LoopUnswitch::processCurrentLoop() { // We are unswitching ~0 out. UnswitchVal = AllOne; } else { - assert(OpChain == OC_OpChainNone && + assert(OpChain == OC_OpChainNone && "Expect to unswitch on trivial chain"); // Do not process same value again and again. // At this point we have some cases already unswitched and @@ -1440,11 +1440,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // This in-loop instruction has been simplified w.r.t. its context, // i.e. LIC != Val, make sure we propagate its replacement value to // all its users. - // + // // We can not yet delete UI, the LIC user, yet, because that would invalidate // the LIC->users() iterator !. However, we can make this instruction // dead by replacing all its users and push it onto the worklist so that - // it can be properly deleted and its operands simplified. + // it can be properly deleted and its operands simplified. UI->replaceAllUsesWith(Replacement); } } @@ -1609,7 +1609,7 @@ Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst, LLVMContext &Ctx = Inst->getContext(); if (CI->getPredicate() == CmpInst::ICMP_EQ) return ConstantInt::getFalse(Ctx); - else + else return ConstantInt::getTrue(Ctx); } } diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 2eb887c986be..3e47e9441d15 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -2007,7 +2007,7 @@ NewGVN::performSymbolicEvaluation(Value *V, case Instruction::Load: E = performSymbolicLoadEvaluation(I); break; - case Instruction::BitCast: + case Instruction::BitCast: E = createExpression(I); break; case Instruction::ICmp: diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index c81ac70d99e6..1df0a9c49fb1 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -1179,7 +1179,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, // and both "Res" and "ConstOpnd" remain unchanged. bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd, Value *&Res) { - // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 + // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 // = ((x | c1) ^ c1) ^ (c1 ^ c2) // = (x & ~c1) ^ (c1 ^ c2) // It is useful only when c1 == c2. @@ -1202,12 +1202,12 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, RedoInsts.insert(T); return true; } - + // Helper function of OptimizeXor(). It tries to simplify // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a -// symbolic value. -// -// If it was successful, true is returned, and the "R" and "C" is returned +// symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned // via "Res" and "ConstOpnd", respectively (If the entire expression is // evaluated to a constant, the Res is set to NULL); otherwise, false is // returned, and both "Res" and "ConstOpnd" remain unchanged. @@ -1254,7 +1254,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, const APInt &C1 = Opnd1->getConstPart(); const APInt &C2 = Opnd2->getConstPart(); APInt C3 = C1 ^ C2; - + // Do not increase code size if (!C3.isNullValue() && !C3.isAllOnesValue()) { int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; @@ -1290,7 +1290,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops) { if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops)) return V; - + if (Ops.size() == 1) return nullptr; @@ -1365,7 +1365,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, } // step 3.2: When previous and current operands share the same symbolic - // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" + // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { // Remove previous operand PrevOpnd->Invalidate(); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 391e43f79121..0de2bc72b522 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -401,7 +401,7 @@ namespace { /// defining value. The 'base defining value' for 'Def' is the transitive /// closure of this relation stopping at the first instruction which has no /// immediate base defining value. The b.d.v. might itself be a base pointer, -/// but it can also be an arbitrary derived pointer. +/// but it can also be an arbitrary derived pointer. struct BaseDefiningValueResult { /// Contains the value which is the base defining value. Value * const BDV; @@ -427,13 +427,13 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I); /// Return a base defining value for the 'Index' element of the given vector /// instruction 'I'. If Index is null, returns a BDV for the entire vector -/// 'I'. As an optimization, this method will try to determine when the +/// 'I'. As an optimization, this method will try to determine when the /// element is known to already be a base pointer. If this can be established, /// the second value in the returned pair will be true. Note that either a /// vector or a pointer typed value can be returned. For the former, the /// vector returned is a BDV (and possibly a base) of the entire vector 'I'. /// If the later, the return pointer is a BDV (or possibly a base) for the -/// particular element in 'I'. +/// particular element in 'I'. static BaseDefiningValueResult findBaseDefiningValueOfVector(Value *I) { // Each case parallels findBaseDefiningValue below, see that code for @@ -444,7 +444,7 @@ findBaseDefiningValueOfVector(Value *I) { return BaseDefiningValueResult(I, true); if (isa<Constant>(I)) - // Base of constant vector consists only of constant null pointers. + // Base of constant vector consists only of constant null pointers. // For reasoning see similar case inside 'findBaseDefiningValue' function. return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()), true); @@ -508,11 +508,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { if (isa<Constant>(I)) { // We assume that objects with a constant base (e.g. a global) can't move // and don't need to be reported to the collector because they are always - // live. Besides global references, all kinds of constants (e.g. undef, + // live. Besides global references, all kinds of constants (e.g. undef, // constant expressions, null pointers) can be introduced by the inliner or // the optimizer, especially on dynamically dead paths. // Here we treat all of them as having single null base. By doing this we - // trying to avoid problems reporting various conflicts in a form of + // trying to avoid problems reporting various conflicts in a form of // "phi (const1, const2)" or "phi (const, regular gc ptr)". // See constant.ll file for relevant test cases. @@ -1285,14 +1285,14 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, return Index; }; Module *M = StatepointToken->getModule(); - + // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose // element type is i8 addrspace(1)*). We originally generated unique // declarations for each pointer type, but this proved problematic because // the intrinsic mangling code is incomplete and fragile. Since we're moving // towards a single unified pointer type anyways, we can just cast everything // to an i8* of the right address space. A bitcast is added later to convert - // gc_relocate to the actual value's type. + // gc_relocate to the actual value's type. auto getGCRelocateDecl = [&] (Type *Ty) { assert(isHandledGCPointerType(Ty)); auto AS = Ty->getScalarType()->getPointerAddressSpace(); @@ -1413,7 +1413,7 @@ static StringRef getDeoptLowering(CallSite CS) { } return "live-through"; } - + static void makeStatepointExplicitImpl(const CallSite CS, /* to replace */ const SmallVectorImpl<Value *> &BasePtrs, @@ -2570,7 +2570,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, } // Before we start introducing relocations, we want to tweak the IR a bit to - // avoid unfortunate code generation effects. The main example is that we + // avoid unfortunate code generation effects. The main example is that we // want to try to make sure the comparison feeding a branch is after any // safepoints. Otherwise, we end up with a comparison of pre-relocation // values feeding a branch after relocation. This is semantically correct, @@ -2593,7 +2593,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, TerminatorInst *TI = BB.getTerminator(); if (auto *Cond = getConditionInst(TI)) // TODO: Handle more than just ICmps here. We should be able to move - // most instructions without side effects or memory access. + // most instructions without side effects or memory access. if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) { MadeChange = true; Cond->moveBefore(TI); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 6c3f012c6280..de16b608f752 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -3730,7 +3730,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 34510cb40732..5834b619046b 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -459,9 +459,11 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, *ParentBB, *OldPH, FullUnswitch); // Now we need to update the dominator tree. - DT.insertEdge(OldPH, UnswitchedBB); + SmallVector<DominatorTree::UpdateType, 2> DTUpdates; + DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB}); if (FullUnswitch) - DT.deleteEdge(ParentBB, UnswitchedBB); + DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB}); + DT.applyUpdates(DTUpdates); // The constant we can replace all of our invariants with inside the loop // body. If any of the invariants have a value other than this the loop won't diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 5f5c4150d3bb..d0396e6ce47d 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -911,7 +911,7 @@ static void appendTypeSuffix(Value *Op, StringRef &Name, NameBuffer += 'l'; Name = NameBuffer; - } + } } Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp index 4d9c22e57a68..6d18d0614611 100644 --- a/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -392,7 +392,7 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee, auto CalleeType = Callee->getFunctionType(); auto CalleeParamNum = CalleeType->getNumParams(); for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { - auto *Arg = CS.getArgument(ArgNo); + auto *Arg = CS.getArgument(ArgNo); Type *FormalTy = CalleeType->getParamType(ArgNo); Type *ActualTy = Arg->getType(); if (FormalTy != ActualTy) { diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 61448e9acb57..807360340055 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -290,7 +290,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Have we already cloned this block? if (BBEntry) return; - + // Nope, clone it now. BasicBlock *NewBB; BBEntry = NewBB = BasicBlock::Create(BB->getContext()); @@ -363,7 +363,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, hasDynamicAllocas = true; } } - + // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; @@ -400,7 +400,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, TerminatorDone = true; } } - + if (!TerminatorDone) { Instruction *NewInst = OldTI->clone(); if (OldTI->hasName()) @@ -418,11 +418,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, for (const BasicBlock *Succ : TI->successors()) ToClone.push_back(Succ); } - + if (CodeInfo) { CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; - CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } } @@ -468,7 +468,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } - + // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. @@ -500,7 +500,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } - + // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { @@ -519,7 +519,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, Value *V = VMap.lookup(PN->getIncomingBlock(pred)); if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), - VMap, + VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); @@ -529,9 +529,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, --pred; // Revisit the next entry. --e; } - } + } } - + // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our @@ -546,11 +546,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; - + // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; - + // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. @@ -563,7 +563,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } } } - + // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. @@ -655,7 +655,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } - + BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; @@ -668,16 +668,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); - + // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); - + // Remove the dest block. Dest->eraseFromParent(); - + // Do not increment I, iteratively merge all things this block branches to. } @@ -703,7 +703,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, @@ -730,7 +730,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl<BasicBlock *> &Blocks) { - assert(OrigLoop->getSubLoops().empty() && + assert(OrigLoop->getSubLoops().empty() && "Loop to be cloned cannot have inner loop"); Function *F = OrigLoop->getHeader()->getParent(); Loop *ParentLoop = OrigLoop->getParentLoop(); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 35c7511a24b9..c7d68bab8170 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -61,7 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule( // for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - GlobalVariable *GV = new GlobalVariable(*New, + GlobalVariable *GV = new GlobalVariable(*New, I->getValueType(), I->isConstant(), I->getLinkage(), (Constant*) nullptr, I->getName(), @@ -110,7 +110,7 @@ std::unique_ptr<Module> llvm::CloneModule( GA->copyAttributesFrom(&*I); VMap[&*I] = GA; } - + // Now that all of the things that global variable initializer can refer to // have been created, loop through and copy the global variable referrers // over... We also set the attributes on the global now. diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index f31dab9f96af..cb349e34606c 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -1020,7 +1020,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } else { // Otherwise we must have code extracted an unwind or something, just // return whatever we want. - ReturnInst::Create(Context, + ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), TheSwitch); } @@ -1158,13 +1158,13 @@ Function *CodeExtractor::extractCodeRegion() { splitReturnBlocks(); // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot"); auto *BranchI = BranchInst::Create(header); // If the original function has debug info, we have to add a debug location diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0315aac1cf84..ddc6e07e2f59 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -1199,7 +1199,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // Only copy the edge if the call was inlined! if (VMI == VMap.end() || VMI->second == nullptr) continue; - + // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast<Instruction>(VMI->second); @@ -1211,7 +1211,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallSite CS = CallSite(NewCall); if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) continue; - + // Remember that this call site got inlined for the client of // InlineFunction. IFI.InlinedCalls.push_back(NewCall); @@ -1231,7 +1231,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->addCalledFunction(CallSite(NewCall), I->second); } - + // Update the call graph by deleting the edge from Callee to Caller. We must // do this after the loop above in case Caller and Callee are the same. CallerNode->removeCallEdgeFor(CS); @@ -1380,7 +1380,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, if (CalleeHasDebugInfo) continue; - + // If the inlined instruction has no line number, make it look as if it // originates from the call location. This is important for // ((__always_inline__, __nodebug__)) functions which must use caller @@ -1777,7 +1777,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (!AI) continue; - + // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { @@ -1787,10 +1787,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (!allocaWouldBeStaticInEntry(AI)) continue; - + // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); - + // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 3fbb3487884b..4a359b99bebd 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -476,10 +476,10 @@ bool llvm::expandDivision(BinaryOperator *Div) { return true; } -/// Generate code to compute the remainder of two integers of bitwidth up to +/// Generate code to compute the remainder of two integers of bitwidth up to /// 32 bits. Uses the above routines and extends the inputs/truncates the /// outputs to operate in 32 bits; that is, these routines are good for targets -/// that have no or very little suppport for smaller than 32 bit integer +/// that have no or very little suppport for smaller than 32 bit integer /// arithmetic. /// /// Replace Rem with emulation code. @@ -527,7 +527,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { return expandRemainder(cast<BinaryOperator>(ExtRem)); } -/// Generate code to compute the remainder of two integers of bitwidth up to +/// Generate code to compute the remainder of two integers of bitwidth up to /// 64 bits. Uses the above routines and extends the inputs/truncates the /// outputs to operate in 64 bits. /// @@ -613,7 +613,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { } else { ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); - ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); } Trunc = Builder.CreateTrunc(ExtDiv, DivTy); @@ -662,7 +662,7 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { } else { ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty); ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); - ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); } Trunc = Builder.CreateTrunc(ExtDiv, DivTy); diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 956d0387c7a8..a1f8e7484bcf 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -10,7 +10,7 @@ // This pass transforms loops by placing phi nodes at the end of the loops for // all values that are live across the loop boundary. For example, it turns // the left into the right code: -// +// // for (...) for (...) // if (c) if (c) // X1 = ... X1 = ... @@ -21,8 +21,8 @@ // ... = X4 + 4 // // This is still valid LLVM; the extra phi nodes are purely redundant, and will -// be trivially eliminated by InstCombine. The major benefit of this -// transformation is that it makes many other loop optimizations, such as +// be trivially eliminated by InstCombine. The major benefit of this +// transformation is that it makes many other loop optimizations, such as // LoopUnswitching, simpler. // //===----------------------------------------------------------------------===// @@ -144,7 +144,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), I->getName() + ".lcssa", &ExitBB->front()); - + // Get the debug location from the original instruction. + PN->setDebugLoc(I->getDebugLoc()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(I, Pred); diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 13794c53f24b..78afe748e596 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -344,7 +344,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, /// Update the branch weights of the latch of a peeled-off loop /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop -/// iteration correctly. +/// iteration correctly. /// Our goal is to make sure that: /// a) The total weight of all the copies of the loop body is preserved. /// b) The total weight of the loop exit is preserved. @@ -544,7 +544,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // // Each following iteration will split the current bottom anchor in two, // and put the new copy of the loop body between these two blocks. That is, - // after peeling another iteration from the example above, we'll split + // after peeling another iteration from the example above, we'll split // InsertBot, and get: // // InsertTop: diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 323f2552ca80..88d595ee02ab 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -68,7 +68,7 @@ namespace { PRNG prng; }; - + struct MetaRenamer : public ModulePass { // Pass identification, replacement for typeid static char ID; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index ca184ed7c4e3..4a1fd8d571aa 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -201,13 +201,13 @@ void SSAUpdater::RewriteUse(Use &U) { void SSAUpdater::RewriteUseAfterInsertions(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); - + Value *V; if (PHINode *UserPN = dyn_cast<PHINode>(User)) V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); else V = GetValueAtEndOfBlock(User->getParent()); - + U.set(V); } @@ -235,7 +235,7 @@ public: PHI_iterator(PHINode *P, bool) // end iterator : PHI(P), idx(PHI->getNumIncomingValues()) {} - PHI_iterator &operator++() { ++idx; return *this; } + PHI_iterator &operator++() { ++idx; return *this; } bool operator==(const PHI_iterator& x) const { return idx == x.idx; } bool operator!=(const PHI_iterator& x) const { return !operator==(x); } @@ -333,7 +333,7 @@ LoadAndStorePromoter:: LoadAndStorePromoter(ArrayRef<const Instruction *> Insts, SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; - + const Value *SomeVal; if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) SomeVal = LI; @@ -354,7 +354,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); - + // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. @@ -364,10 +364,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { for (Instruction *User : Insts) { BasicBlock *BB = User->getParent(); TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB]; - + // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; - + // Okay, this is the first use in the block. If this block just has a // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { @@ -375,13 +375,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); - } else + } else // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast<LoadInst>(User)); BlockUses.clear(); continue; } - + // Otherwise, check to see if this block is all loads. bool HasStore = false; for (Instruction *I : BlockUses) { @@ -390,7 +390,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { break; } } - + // If so, we can queue them all as live in loads. We don't have an // efficient way to tell which on is first in the block and don't want to // scan large blocks, so just add all loads as live ins. @@ -400,7 +400,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { BlockUses.clear(); continue; } - + // Otherwise, we have mixed loads and stores (or just a bunch of stores). // Since SSAUpdater is purely for cross-block values, we need to determine // the order of these instructions in the block. If the first use in the @@ -411,7 +411,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { if (LoadInst *L = dyn_cast<LoadInst>(&I)) { // If this is a load from an unrelated pointer, ignore it. if (!isInstInList(L, Insts)) continue; - + // If we haven't seen a store yet, this is a live in use, otherwise // use the stored value. if (StoredValue) { @@ -433,13 +433,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { StoredValue = SI->getOperand(0); } } - + // The last stored value that happened is the live-out for the block. assert(StoredValue && "Already checked that there is a store in block"); SSA.AddAvailableValue(BB, StoredValue); BlockUses.clear(); } - + // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (LoadInst *ALoad : LiveInLoads) { @@ -451,10 +451,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { ALoad->replaceAllUsesWith(NewVal); ReplacedLoads[ALoad] = NewVal; } - + // Allow the client to do stuff before we start nuking things. doExtraRewritesBeforeFinalDeletion(); - + // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (Instruction *User : Insts) { @@ -465,7 +465,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { if (!User->use_empty()) { Value *NewVal = ReplacedLoads[User]; assert(NewVal && "not a replaced load?"); - + // Propagate down to the ultimate replacee. The intermediately loads // could theoretically already have been deleted, so we don't want to // dereference the Value*'s. @@ -474,11 +474,11 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { NewVal = RLI->second; RLI = ReplacedLoads.find(NewVal); } - + replaceLoadWithValue(cast<LoadInst>(User), NewVal); User->replaceAllUsesWith(NewVal); } - + instructionDeleted(User); User->eraseFromParent(); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index e381fbc34ab4..65b23f4d94a1 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -196,7 +196,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, SmallDenseMap<const SCEV*, Value*> CheapExpansions; CheapExpansions[S] = ICmp->getOperand(IVOperIdx); CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx); - + // TODO: Support multiple entry loops? (We currently bail out of these in // the IndVarSimplify pass) if (auto *BB = L->getLoopPredecessor()) { diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8c48597fc2e4..15e035874002 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -890,7 +890,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, return nullptr; // Replace the malloc with a calloc. We need the data layout to know what the - // actual size of a 'size_t' parameter is. + // actual size of a 'size_t' parameter is. B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); const DataLayout &DL = Malloc->getModule()->getDataLayout(); IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); @@ -970,7 +970,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, Value *V = valueHasFloatPrecision(CI->getArgOperand(0)); if (V == nullptr) return nullptr; - + // If call isn't an intrinsic, check that it isn't within a function with the // same name as the float version of this call. // @@ -1126,165 +1126,164 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { if (!Pow->isFast()) return nullptr; - const APFloat *Arg1C; - if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C))) - return nullptr; - if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5)) + Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Type *Ty = Pow->getType(); + + const APFloat *ExpoF; + if (!match(Expo, m_APFloat(ExpoF)) || + (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) return nullptr; - // Fast-math flags from the pow() are propagated to all replacement ops. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(Pow->getFastMathFlags()); - Type *Ty = Pow->getType(); - Value *Sqrt; + // If errno is never set, then use the intrinsic for sqrt(). if (Pow->hasFnAttr(Attribute::ReadNone)) { - // We know that errno is never set, so replace with an intrinsic: - // pow(x, 0.5) --> llvm.sqrt(x) - // llvm.pow(x, 0.5) --> llvm.sqrt(x) - auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty); - Sqrt = B.CreateCall(F, Pow->getArgOperand(0)); - } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) { - // Errno could be set, so we must use a sqrt libcall. - // TODO: We also should check that the target can in fact lower the sqrt - // libcall. We currently have no way to ask this question, so we ask - // whether the target has a sqrt libcall which is not exactly the same. - Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0), - TLI->getName(LibFunc_sqrt), B, + Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(), + Intrinsic::sqrt, Ty); + Sqrt = B.CreateCall(SqrtFn, Base); + } + // Otherwise, use the libcall for sqrt(). + else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) + // TODO: We also should check that the target can in fact lower the sqrt() + // libcall. We currently have no way to ask this question, so we ask if + // the target has a sqrt() libcall, which is not exactly the same. + Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B, Pow->getCalledFunction()->getAttributes()); - } else { - // We can't replace with an intrinsic or a libcall. + else return nullptr; - } - // If this is pow(x, -0.5), get the reciprocal. - if (Arg1C->isExactlyValue(-0.5)) - Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt); + // If the exponent is negative, then get the reciprocal. + if (ExpoF->isNegative()) + Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); return Sqrt; } -Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; +Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { + Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Function *Callee = Pow->getCalledFunction(); + AttributeList Attrs = Callee->getAttributes(); StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); + Module *Module = Pow->getModule(); + Type *Ty = Pow->getType(); + Value *Shrunk = nullptr; + bool Ignored; + + if (UnsafeFPShrink && + Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) + Shrunk = optimizeUnaryDoubleFP(Pow, B, true); + + // Propagate the math semantics from the call to any created instructions. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(Pow->getFastMathFlags()); - Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); + // Evaluate special cases related to the base. // pow(1.0, x) -> 1.0 - if (match(Op1, m_SpecificFP(1.0))) - return Op1; - // pow(2.0, x) -> llvm.exp2(x) - if (match(Op1, m_SpecificFP(2.0))) { - Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2, - CI->getType()); - return B.CreateCall(Exp2, Op2, "exp2"); - } - - // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will - // be one. - if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { - // pow(10.0, x) -> exp10(x) - if (Op1C->isExactlyValue(10.0) && - hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f, - LibFunc_exp10l)) - return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B, - Callee->getAttributes()); + if (match(Base, m_SpecificFP(1.0))) + return Base; + + // pow(2.0, x) -> exp2(x) + if (match(Base, m_SpecificFP(2.0))) { + Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty); + return B.CreateCall(Exp2, Expo, "exp2"); } + // pow(10.0, x) -> exp10(x) + if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base)) + // There's no exp10 intrinsic yet, but, maybe, some day there shall be one. + if (BaseC->isExactlyValue(10.0) && + hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs); + // pow(exp(x), y) -> exp(x * y) // pow(exp2(x), y) -> exp2(x * y) // We enable these only with fast-math. Besides rounding differences, the // transformation changes overflow and underflow behavior quite dramatically. // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). - auto *OpC = dyn_cast<CallInst>(Op1); - if (OpC && OpC->isFast() && CI->isFast()) { - LibFunc Func; - Function *OpCCallee = OpC->getCalledFunction(); - if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && - TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) { + auto *BaseFn = dyn_cast<CallInst>(Base); + if (BaseFn && BaseFn->isFast() && Pow->isFast()) { + LibFunc LibFn; + Function *CalleeFn = BaseFn->getCalledFunction(); + if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) { IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); - return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, - OpCCallee->getAttributes()); + B.setFastMathFlags(Pow->getFastMathFlags()); + + Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); + return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B, + CalleeFn->getAttributes()); } } - if (Value *Sqrt = replacePowWithSqrt(CI, B)) + // Evaluate special cases related to the exponent. + + if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; - ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (!Op2C) - return Ret; + ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo); + if (!ExpoC) + return Shrunk; - if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 - return ConstantFP::get(CI->getType(), 1.0); + // pow(x, -1.0) -> 1.0 / x + if (ExpoC->isExactlyValue(-1.0)) + return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); - // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). - if (Op2C->isExactlyValue(0.5) && - hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) { - // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). - // This is faster than calling pow, and still handles negative zero - // and negative infinity correctly. - // TODO: In finite-only mode, this could be just fabs(sqrt(x)). - Value *Inf = ConstantFP::getInfinity(CI->getType()); - Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + // pow(x, 0.0) -> 1.0 + if (ExpoC->getValueAPF().isZero()) + return ConstantFP::get(Ty, 1.0); - // TODO: As above, we should lower to the sqrt intrinsic if the pow is an - // intrinsic, to match errno semantics. - Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes()); + // pow(x, 1.0) -> x + if (ExpoC->isExactlyValue(1.0)) + return Base; - Module *M = Callee->getParent(); - Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs, - CI->getType()); - Value *FAbs = B.CreateCall(FabsF, Sqrt); + // pow(x, 2.0) -> x * x + if (ExpoC->isExactlyValue(2.0)) + return B.CreateFMul(Base, Base, "square"); - Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); - Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); - return Sel; + // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). + if (ExpoC->isExactlyValue(0.5) && + hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow(), and still handles -0.0 and + // negative infinity correctly. + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *PosInf = ConstantFP::getInfinity(Ty); + Value *NegInf = ConstantFP::getInfinity(Ty, true); + + // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is + // an intrinsic, to match errno semantics. + Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), + B, Attrs); + Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty); + Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs"); + Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); + Sqrt = B.CreateSelect(FCmp, PosInf, FAbs); + return Sqrt; } - // Propagate fast-math-flags from the call to any created instructions. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - // pow(x, 1.0) --> x - if (Op2C->isExactlyValue(1.0)) - return Op1; - // pow(x, 2.0) --> x * x - if (Op2C->isExactlyValue(2.0)) - return B.CreateFMul(Op1, Op1, "pow2"); - // pow(x, -1.0) --> 1.0 / x - if (Op2C->isExactlyValue(-1.0)) - return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); - - // In -ffast-math, generate repeated fmul instead of generating pow(x, n). - if (CI->isFast()) { - APFloat V = abs(Op2C->getValueAPF()); - // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // pow(x, n) -> x * x * x * .... + if (Pow->isFast()) { + APFloat ExpoA = abs(ExpoC->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32. // This transformation applies to integer exponents only. - if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || - !V.isInteger()) + if (!ExpoA.isInteger() || + ExpoA.compare + (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan) return nullptr; // We will memoize intermediate products of the Addition Chain. Value *InnerChain[33] = {nullptr}; - InnerChain[1] = Op1; - InnerChain[2] = B.CreateFMul(Op1, Op1); + InnerChain[1] = Base; + InnerChain[2] = B.CreateFMul(Base, Base, "square"); // We cannot readily convert a non-double type (like float) to a double. - // So we first convert V to something which could be converted to double. - bool Ignored; - V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); - - Value *FMul = getPow(InnerChain, V.convertToDouble(), B); - // For negative exponents simply compute the reciprocal. - if (Op2C->isNegative()) - FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + // So we first convert it to something which could be converted to double. + ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); + Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + + // If the exponent is negative, then get the reciprocal. + if (ExpoC->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); return FMul; } diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 3640541e63cc..fd0da79487f1 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -536,7 +536,7 @@ private: char RewriteSymbolsLegacyPass::ID = 0; RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) { - initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); + initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); } RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index e633ac0c874d..d49b26472548 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -61,7 +61,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { - UnreachableBlock = BasicBlock::Create(F.getContext(), + UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3c693f5d5ee0..859d0c92ca5a 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -535,13 +535,13 @@ protected: /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; - /// If there is a cast involved in the induction variable \p ID, which should - /// be ignored in the vectorized loop body, this function records the - /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the - /// cast. We had already proved that the casted Phi is equal to the uncasted - /// Phi in the vectorized loop (under a runtime guard), and therefore - /// there is no need to vectorize the cast - the same value can be used in the - /// vector loop for both the Phi and the cast. + /// If there is a cast involved in the induction variable \p ID, which should + /// be ignored in the vectorized loop body, this function records the + /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the + /// cast. We had already proved that the casted Phi is equal to the uncasted + /// Phi in the vectorized loop (under a runtime guard), and therefore + /// there is no need to vectorize the cast - the same value can be used in the + /// vector loop for both the Phi and the cast. /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified, /// Otherwise, \p VectorLoopValue is a widened/vectorized value. /// @@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){ // high enough value to practically disable vectorization with such // operations, except where previously deployed legality hack allowed // using very low cost values. This is to avoid regressions coming simply - // from moving "masked load/store" check from legality to cost model. + // from moving "masked load/store" check from legality to cost model. // Masked Load/Gather emulation was previously never allowed. // Limited number of Masked Store/Scatter emulation was allowed. assert(isScalarWithPredication(I) && @@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions( })) DeadInstructions.insert(IndUpdate); - // We record as "Dead" also the type-casting instructions we had identified + // We record as "Dead" also the type-casting instructions we had identified // during induction analysis. We don't need any handling for them in the - // vectorized loop because we have proven that, under a proper runtime - // test guarding the vectorized loop, the value of the phi, and the casted + // vectorized loop because we have proven that, under a proper runtime + // test guarding the vectorized loop, the value of the phi, and the casted // value of the phi, are the same. The last instruction in this casting chain - // will get its scalar/vector/widened def from the scalar/vector/widened def + // will get its scalar/vector/widened def from the scalar/vector/widened def // of the respective phi node. Any other casts in the induction def-use chain // have no other uses outside the phi update chain, and will be ignored. InductionDescriptor &IndDes = Induction.second; @@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) { auto Plan = llvm::make_unique<VPlan>(); // Build hierarchical CFG - VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); - HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); + HCFGBuilder.buildHierarchicalCFG(); return Plan; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ac8c4f046c6f..5c2efe885e22 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) { } /// \returns analysis of the Instructions in \p VL described in -/// InstructionsState, the Opcode that we suppose the whole list +/// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. static InstructionsState getSameOpcode(ArrayRef<Value *> VL, unsigned BaseIndex = 0) { @@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // TODO: Merge this shuffle with the ReorderShuffleMask. if (!E->ReorderIndices.empty()) Builder.SetInsertPoint(VL0); + else if (auto *I = dyn_cast<Instruction>(V)) + Builder.SetInsertPoint(I->getParent(), + std::next(I->getIterator())); + else + Builder.SetInsertPoint(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); } diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index f7b07b722bb1..0780e70809d0 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #include "VPlan.h" +#include "VPlanDominatorTree.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" @@ -25,7 +26,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -34,6 +34,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, } O << "\\l\""; } + +template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 866951cb79a4..883e6f52369a 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -26,8 +26,10 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H +#include "VPlanLoopInfo.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" @@ -51,7 +53,6 @@ class BasicBlock; class DominatorTree; class InnerLoopVectorizer; class InterleaveGroup; -class LoopInfo; class raw_ostream; class Value; class VPBasicBlock; @@ -516,6 +517,23 @@ public: /// Delete all blocks reachable from a given VPBlockBase, inclusive. static void deleteCFG(VPBlockBase *Entry); + + void printAsOperand(raw_ostream &OS, bool PrintType) const { + OS << getName(); + } + + void print(raw_ostream &OS) const { + // TODO: Only printing VPBB name for now since we only have dot printing + // support for VPInstructions/Recipes. + printAsOperand(OS, false); + } + + /// Return true if it is legal to hoist instructions into this block. + bool isLegalToHoistInto() { + // There are currently no constraints that prevent an instruction to be + // hoisted into a VPBlockBase. + return true; + } }; /// VPRecipeBase is a base class modeling a sequence of one or more output IR @@ -1037,6 +1055,12 @@ public: EntryBlock->setParent(this); } + // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a + // specific interface of llvm::Function, instead of using + // GraphTraints::getEntryNode. We should add a new template parameter to + // DominatorTreeBase representing the Graph type. + VPBlockBase &front() const { return *Entry; } + const VPBlockBase *getExit() const { return Exit; } VPBlockBase *getExit() { return Exit; } @@ -1087,6 +1111,9 @@ private: /// VPlan. Value2VPValueTy Value2VPValue; + /// Holds the VPLoopInfo analysis for this VPlan. + VPLoopInfo VPLInfo; + public: VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} @@ -1133,6 +1160,10 @@ public: return Value2VPValue[V]; } + /// Return the VPLoopInfo analysis for this VPlan. + VPLoopInfo &getVPLoopInfo() { return VPLInfo; } + const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. @@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) { return OS; } -//===--------------------------------------------------------------------===// -// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs // -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // +//===----------------------------------------------------------------------===// -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... +// The following set of template specializations implement GraphTraits to treat +// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note +// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the +// VPBlockBase is a VPRegionBlock, this specialization provides access to its +// successors/predecessors but not to the blocks inside the region. template <> struct GraphTraits<VPBlockBase *> { using NodeRef = VPBlockBase *; @@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> { } }; -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order -// for a VPBlockBase is considered to be when traversing the predecessors of a -// VPBlockBase instead of its successors. +// Inverse order specialization for VPBasicBlocks. Predecessors are used instead +// of successors for the inverse traversal. template <> struct GraphTraits<Inverse<VPBlockBase *>> { using NodeRef = VPBlockBase *; using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; - static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) { - return B; - } + static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; } static inline ChildIteratorType child_begin(NodeRef N) { return N->getPredecessors().begin(); @@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> { } }; +// The following set of template specializations implement GraphTraits to +// treat VPRegionBlock as a graph and recurse inside its nodes. It's important +// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases +// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so +// there won't be automatic recursion into other VPBlockBases that turn to be +// VPRegionBlocks. + +template <> +struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits<const VPRegionBlock *> + : public GraphTraits<const VPBlockBase *> { + using GraphRef = const VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits<Inverse<VPRegionBlock *>> + : public GraphTraits<Inverse<VPBlockBase *>> { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(Inverse<GraphRef> N) { + return N.Graph->getExit(); + } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getExit()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + //===----------------------------------------------------------------------===// // VPlan Utilities //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h new file mode 100644 index 000000000000..1b81097b6d31 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -0,0 +1,41 @@ +//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements dominator tree analysis for a single level of a VPlan's +/// H-CFG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H + +#include "VPlan.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/IR/Dominators.h" + +namespace llvm { + +/// Template specialization of the standard LLVM dominator tree utility for +/// VPBlockBases. +using VPDominatorTree = DomTreeBase<VPBlockBase>; + +using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>; + +/// Template specializations of GraphTraits for VPDomTreeNode. +template <> +struct GraphTraits<VPDomTreeNode *> + : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {}; + +template <> +struct GraphTraits<const VPDomTreeNode *> + : public DomTreeGraphTraitsBase<const VPDomTreeNode, + VPDomTreeNode::const_iterator> {}; +} // namespace llvm +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 08129b74cddf..b6307acb9474 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { return TopRegion; } +VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { + PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); + return PCFGBuilder.buildPlainCFG(); +} + // Public interface to build a H-CFG. -void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) { +void VPlanHCFGBuilder::buildHierarchicalCFG() { // Build Top Region enclosing the plain CFG and set it as VPlan entry. - PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); - VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG(); + VPRegionBlock *TopRegion = buildPlainCFG(); Plan.setEntry(TopRegion); LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); Verifier.verifyHierarchicalCFG(TopRegion); + + // Compute plain CFG dom tree for VPLInfo. + VPDomTree.recalculate(*TopRegion); + LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n"; + VPDomTree.print(dbgs())); + + // Compute VPLInfo and keep it in Plan. + VPLoopInfo &VPLInfo = Plan.getVPLoopInfo(); + VPLInfo.analyze(VPDomTree); + LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n"; + VPLInfo.print(dbgs())); } diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index c4e69843615a..3f11dcb5164d 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -26,14 +26,18 @@ #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H #include "VPlan.h" +#include "VPlanDominatorTree.h" #include "VPlanVerifier.h" namespace llvm { class Loop; +class VPlanTestBase; /// Main class to build the VPlan H-CFG for an incoming IR. class VPlanHCFGBuilder { + friend VPlanTestBase; + private: // The outermost loop of the input loop nest considered for vectorization. Loop *TheLoop; @@ -41,14 +45,27 @@ private: // Loop Info analysis. LoopInfo *LI; + // The VPlan that will contain the H-CFG we are building. + VPlan &Plan; + // VPlan verifier utility. VPlanVerifier Verifier; + // Dominator analysis for VPlan plain CFG to be used in the + // construction of the H-CFG. This analysis is no longer valid once regions + // are introduced. + VPDominatorTree VPDomTree; + + /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion) + /// enclosing the plain CFG. + VPRegionBlock *buildPlainCFG(); + public: - VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} + VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) + : TheLoop(Lp), LI(LI), Plan(P) {} - /// Build H-CFG for TheLoop and update \p Plan accordingly. - void buildHierarchicalCFG(VPlan &Plan); + /// Build H-CFG for TheLoop and update Plan accordingly. + void buildHierarchicalCFG(); }; } // namespace llvm diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h new file mode 100644 index 000000000000..5c2485fc2145 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h @@ -0,0 +1,45 @@ +//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a +/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization +/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further +/// information can be found in VectorizationPlanner.rst. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H + +#include "llvm/Analysis/LoopInfoImpl.h" + +namespace llvm { +class VPBlockBase; + +/// Hold analysis information for every loop detected by VPLoopInfo. It is an +/// instantiation of LoopBase. +class VPLoop : public LoopBase<VPBlockBase, VPLoop> { +private: + friend class LoopInfoBase<VPBlockBase, VPLoop>; + explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {} +}; + +/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based +/// Hierarchical CFG. It is a specialization of LoopInfoBase class. +// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which +// is the same as the incoming IR CFG. If it's more efficient than running the +// whole loop detection algorithm, we may want to create a mechanism to +// translate LoopInfo into VPLoopInfo. However, that would require significant +// changes in LoopInfoBase class. +typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H |