diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-08-02 17:32:43 +0000 | 
| commit | b7eb8e35e481a74962664b63dfb09483b200209a (patch) | |
| tree | 1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd /lib/Transforms | |
| parent | eb11fae6d08f479c0799db45860a98af528fa6e7 (diff) | |
Notes
Diffstat (limited to 'lib/Transforms')
58 files changed, 702 insertions, 429 deletions
| diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 31e771da3bd3..cd2bd734eb26 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -56,7 +56,7 @@ using namespace llvm;  STATISTIC(NumArgumentsEliminated, "Number of unread args removed");  STATISTIC(NumRetValsEliminated  , "Number of unused return values removed"); -STATISTIC(NumArgumentsReplacedWithUndef,  +STATISTIC(NumArgumentsReplacedWithUndef,            "Number of unread args replaced with undef");  namespace { @@ -109,7 +109,7 @@ namespace {  char DAH::ID = 0; -INITIALIZE_PASS(DAH, "deadarghaX0r",  +INITIALIZE_PASS(DAH, "deadarghaX0r",                  "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",                  false, false) @@ -256,7 +256,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {    return true;  } -/// RemoveDeadArgumentsFromCallers - Checks if the given function has any  +/// RemoveDeadArgumentsFromCallers - Checks if the given function has any  /// arguments that are unused, and changes the caller parameters to be undefined  /// instead.  bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { @@ -640,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {        Result = Live;      } else {        // See what the effect of this use is (recording any uses that cause -      // MaybeLive in MaybeLiveArgUses).  +      // MaybeLive in MaybeLiveArgUses).        Result = SurveyUses(&*AI, MaybeLiveArgUses);      } @@ -777,7 +777,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {    //    argument.    // 2) Retain the 'returned' attribute and treat the return value (but not the    //    entire function) as live so that it is not eliminated. -  //  +  //    // It's not clear in the general case which option is more profitable because,    // even in the absence of explicit uses of the return value, code generation    // is free to use the 'returned' attribute to do things like eliding diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 2797da6c0abd..010b0a29807d 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -617,7 +617,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) {      if (!isGuaranteedToTransferExecutionToSuccessor(&I))        break;    } -   +    return Changed;  } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1af7e6894777..1761d7faff57 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -357,6 +357,41 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,    return Changed;  } +static bool isSafeSROAElementUse(Value *V); + +/// Return true if the specified GEP is a safe user of a derived +/// expression from a global that we want to SROA. +static bool isSafeSROAGEP(User *U) { +  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we +  // don't like < 3 operand CE's, and we don't like non-constant integer +  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some +  // value of C. +  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || +      !cast<Constant>(U->getOperand(1))->isNullValue()) +    return false; + +  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); +  ++GEPI; // Skip over the pointer index. + +  // For all other level we require that the indices are constant and inrange. +  // In particular, consider: A[0][i].  We cannot know that the user isn't doing +  // invalid things like allowing i to index an out-of-range subscript that +  // accesses A[1]. This can also happen between different members of a struct +  // in llvm IR. +  for (; GEPI != E; ++GEPI) { +    if (GEPI.isStruct()) +      continue; + +    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); +    if (!IdxVal || (GEPI.isBoundedSequential() && +                    IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) +      return false; +  } + +  return llvm::all_of(U->users(), +                      [](User *UU) { return isSafeSROAElementUse(UU); }); +} +  /// Return true if the specified instruction is a safe user of a derived  /// expression from a global that we want to SROA.  static bool isSafeSROAElementUse(Value *V) { @@ -374,84 +409,25 @@ static bool isSafeSROAElementUse(Value *V) {    if (StoreInst *SI = dyn_cast<StoreInst>(I))      return SI->getOperand(0) != V; -  // Otherwise, it must be a GEP. -  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); -  if (!GEPI) return false; - -  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || -      !cast<Constant>(GEPI->getOperand(1))->isNullValue()) -    return false; - -  for (User *U : GEPI->users()) -    if (!isSafeSROAElementUse(U)) -      return false; -  return true; -} - -/// U is a direct user of the specified global value.  Look at it and its uses -/// and decide whether it is safe to SROA this global. -static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { -  // The user of the global must be a GEP Inst or a ConstantExpr GEP. -  if (!isa<GetElementPtrInst>(U) && -      (!isa<ConstantExpr>(U) || -       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) -    return false; - -  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we -  // don't like < 3 operand CE's, and we don't like non-constant integer -  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some -  // value of C. -  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || -      !cast<Constant>(U->getOperand(1))->isNullValue() || -      !isa<ConstantInt>(U->getOperand(2))) -    return false; - -  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); -  ++GEPI;  // Skip over the pointer index. - -  // If this is a use of an array allocation, do a bit more checking for sanity. -  if (GEPI.isSequential()) { -    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); - -    // Check to make sure that index falls within the array.  If not, -    // something funny is going on, so we won't do the optimization. -    // -    if (GEPI.isBoundedSequential() && -        Idx->getZExtValue() >= GEPI.getSequentialNumElements()) -      return false; - -    // We cannot scalar repl this level of the array unless any array -    // sub-indices are in-range constants.  In particular, consider: -    // A[0][i].  We cannot know that the user isn't doing invalid things like -    // allowing i to index an out-of-range subscript that accesses A[1]. -    // -    // Scalar replacing *just* the outer index of the array is probably not -    // going to be a win anyway, so just give up. -    for (++GEPI; // Skip array index. -         GEPI != E; -         ++GEPI) { -      if (GEPI.isStruct()) -        continue; - -      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); -      if (!IdxVal || -          (GEPI.isBoundedSequential() && -           IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) -        return false; -    } -  } - -  return llvm::all_of(U->users(), -                      [](User *UU) { return isSafeSROAElementUse(UU); }); +  // Otherwise, it must be a GEP. Check it and its users are safe to SRA. +  return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);  }  /// Look at all uses of the global and decide whether it is safe for us to  /// perform this transformation.  static bool GlobalUsersSafeToSRA(GlobalValue *GV) { -  for (User *U : GV->users()) -    if (!IsUserOfGlobalSafeForSRA(U, GV)) +  for (User *U : GV->users()) { +    // The user of the global must be a GEP Inst or a ConstantExpr GEP. +    if (!isa<GetElementPtrInst>(U) && +        (!isa<ConstantExpr>(U) || +        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))        return false; +    // Check the gep and it's users are safe to SRA +    if (!isSafeSROAGEP(U)) +      return false; +  } +    return true;  } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index f79b61037f1d..7d55ebecbf92 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -61,12 +61,12 @@ static bool PropagateConstantsIntoArguments(Function &F) {      User *UR = U.getUser();      // Ignore blockaddress uses.      if (isa<BlockAddress>(UR)) continue; -     +      // Used by a non-instruction, or not the callee of a function, do not      // transform.      if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))        return false; -     +      CallSite CS(cast<Instruction>(UR));      if (!CS.isCallee(&U))        return false; @@ -77,11 +77,11 @@ static bool PropagateConstantsIntoArguments(Function &F) {      Function::arg_iterator Arg = F.arg_begin();      for (unsigned i = 0, e = ArgumentConstants.size(); i != e;           ++i, ++AI, ++Arg) { -       +        // If this argument is known non-constant, ignore it.        if (ArgumentConstants[i].second)          continue; -       +        Constant *C = dyn_cast<Constant>(*AI);        if (C && ArgumentConstants[i].first == nullptr) {          ArgumentConstants[i].first = C;   // First constant seen. @@ -108,7 +108,7 @@ static bool PropagateConstantsIntoArguments(Function &F) {      if (ArgumentConstants[i].second || AI->use_empty() ||          AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))        continue; -   +      Value *V = ArgumentConstants[i].first;      if (!V) V = UndefValue::get(AI->getType());      AI->replaceAllUsesWith(V); @@ -147,7 +147,7 @@ static bool PropagateConstantReturn(Function &F) {    SmallVector<Value *,4> RetVals;    StructType *STy = dyn_cast<StructType>(F.getReturnType());    if (STy) -    for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)  +    for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)        RetVals.push_back(UndefValue::get(STy->getElementType(i)));    else      RetVals.push_back(UndefValue::get(F.getReturnType())); @@ -172,7 +172,7 @@ static bool PropagateConstantReturn(Function &F) {            // Ignore undefs, we can change them into anything            if (isa<UndefValue>(V))              continue; -           +            // Try to see if all the rets return the same constant or argument.            if (isa<Constant>(V) || isa<Argument>(V)) {              if (isa<UndefValue>(RV)) { @@ -206,7 +206,7 @@ static bool PropagateConstantReturn(Function &F) {      // directly?      if (!Call || !CS.isCallee(&U))        continue; -     +      // Call result not used?      if (Call->use_empty())        continue; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 139941127dee..3bebb96c6d35 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -27,7 +27,7 @@  // -- We define Function* container class with custom "operator<" (FunctionPtr).  // -- "FunctionPtr" instances are stored in std::set collection, so every  //    std::set::insert operation will give you result in log(N) time. -//  +//  // As an optimization, a hash of the function structure is calculated first, and  // two functions are only compared if they have the same hash. This hash is  // cheap to compute, and has the property that if function F == G according to @@ -383,7 +383,7 @@ bool MergeFunctions::runOnModule(Module &M) {    for (Function &Func : M) {      if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {        HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); -    }  +    }    }    std::stable_sort( @@ -402,7 +402,7 @@ bool MergeFunctions::runOnModule(Module &M) {        Deferred.push_back(WeakTrackingVH(I->second));      }    } -   +    do {      std::vector<WeakTrackingVH> Worklist;      Deferred.swap(Worklist); @@ -802,11 +802,11 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,    Function *F = FN.getFunc();    assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&           "The two functions must be equal"); -   +    auto I = FNodesInTree.find(F);    assert(I != FNodesInTree.end() && "F should be in FNodesInTree");    assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G"); -   +    FnTreeType::iterator IterToFNInFnTree = I->second;    assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");    // Remove F -> FN and insert G -> FN diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 27d791857314..2be654258aa8 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -77,13 +77,13 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {    // Next, check to see if any callees might throw or if there are any external    // functions in this SCC: if so, we cannot prune any functions in this SCC. -  // Definitions that are weak and not declared non-throwing might be  +  // Definitions that are weak and not declared non-throwing might be    // overridden at linktime with something that throws, so assume that.    // If this SCC includes the unwind instruction, we KNOW it throws, so    // obviously the SCC might throw.    //    bool SCCMightUnwind = false, SCCMightReturn = false; -  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();  +  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();         (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {      Function *F = (*I)->getFunction();      if (!F) { diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index aa31e0d850dd..83054588a9aa 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -926,7 +926,13 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {    if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))      return NV; -  Value *X; +  Value *X, *Y; + +  // add (sub X, Y), -1 --> add (not Y), X +  if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) && +      match(Op1, m_AllOnes())) +    return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X); +    // zext(bool) + C -> bool ? C + 1 : C    if (match(Op0, m_ZExt(m_Value(X))) &&        X->getType()->getScalarSizeInBits() == 1) @@ -1608,6 +1614,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {    if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y))))      return BinaryOperator::CreateSub(Y, X); +  // (X + -1) - Y --> ~Y + X +  if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes())))) +    return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X); + +  // Y - (X + 1) --> ~X + Y +  if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One())))) +    return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0); +    if (Constant *C = dyn_cast<Constant>(Op0)) {      bool IsNegate = match(C, m_ZeroInt());      Value *X; @@ -1858,7 +1872,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {    Constant *C;    if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1))      return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I); -   +    // X - (-Y) --> X + Y    if (match(Op1, m_FNeg(m_Value(Y))))      return BinaryOperator::CreateFAddFMF(Op0, Y, &I); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 372bc41f780e..3d758e2fe7c9 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1550,31 +1550,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {      return DeMorgan;    { -    Value *A = nullptr, *B = nullptr, *C = nullptr; -    // A&(A^B) => A & ~B -    { -      Value *tmpOp0 = Op0; -      Value *tmpOp1 = Op1; -      if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) { -        if (A == Op1 || B == Op1 ) { -          tmpOp1 = Op0; -          tmpOp0 = Op1; -          // Simplify below -        } -      } - -      if (match(tmpOp1, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) { -        if (B == tmpOp0) { -          std::swap(A, B); -        } -        // Notice that the pattern (A&(~B)) is actually (A&(-1^B)), so if -        // A is originally -1 (or a vector of -1 and undefs), then we enter -        // an endless loop. By checking that A is non-constant we ensure that -        // we will never get to the loop. -        if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B -          return BinaryOperator::CreateAnd(A, Builder.CreateNot(B)); -      } -    } +    Value *A, *B, *C; +    // A & (A ^ B) --> A & ~B +    if (match(Op1, m_OneUse(m_c_Xor(m_Specific(Op0), m_Value(B))))) +      return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(B)); +    // (A ^ B) & A --> A & ~B +    if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B))))) +      return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B));      // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C      if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index e8ea7396a96a..fd59c3a7c0c3 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2243,6 +2243,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {      Type *DstElTy = DstPTy->getElementType();      Type *SrcElTy = SrcPTy->getElementType(); +    // Casting pointers between the same type, but with different address spaces +    // is an addrspace cast rather than a bitcast. +    if ((DstElTy == SrcElTy) && +        (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace())) +      return new AddrSpaceCastInst(Src, DestTy); +      // If we are casting a alloca to a pointer to a type of the same      // size, rewrite the allocation instruction to allocate the "right" type.      // There is no need to modify malloc calls because it is their bitcast that diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 742caf649007..62769f077b47 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -518,7 +518,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT  static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {    assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&           "can't fold an atomic store of requested type"); -   +    Value *Ptr = SI.getPointerOperand();    unsigned AS = SI.getPointerAddressSpace();    SmallVector<std::pair<unsigned, MDNode *>, 8> MD; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 4867808478a3..796b4021d273 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -54,6 +54,36 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder,    return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);  } +/// Fold +///   %A = icmp eq/ne i8 %x, 0 +///   %B = op i8 %x, %z +///   %C = select i1 %A, i8 %B, i8 %y +/// To +///   %C = select i1 %A, i8 %z, i8 %y +/// OP: binop with an identity constant +/// TODO: support for non-commutative and FP opcodes +static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) { + +  Value *Cond = Sel.getCondition(); +  Value *X, *Z; +  Constant *C; +  CmpInst::Predicate Pred; +  if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) || +      !ICmpInst::isEquality(Pred)) +    return nullptr; + +  bool IsEq = Pred == ICmpInst::ICMP_EQ; +  auto *BO = +      dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue()); +  // TODO: support for undefs +  if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) && +      ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) { +    Sel.setOperand(IsEq ? 1 : 2, Z); +    return &Sel; +  } +  return nullptr; +} +  /// This folds:  ///  select (icmp eq (and X, C1)), TC, FC  ///    iff C1 is a power 2 and the difference between TC and FC is a power-of-2. @@ -1961,5 +1991,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {    if (Instruction *Select = foldSelectCmpXchg(SI))      return Select; +  if (Instruction *Select = foldSelectBinOpIdentity(SI)) +    return Select; +    return nullptr;  } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 34f8037e519f..1ca75f3989d4 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -570,7 +570,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,                              m_OneUse(m_BinOp(FBO))))) {        const APInt *C;        if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal && -          match(FBO->getOperand(1), m_APInt(C)) &&  +          match(FBO->getOperand(1), m_APInt(C)) &&            canShiftBinOpWithConstantRHS(I, FBO, *C)) {          Constant *NewRHS = ConstantExpr::get(I.getOpcode(),                                         cast<Constant>(FBO->getOperand(1)), Op1); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 2560feb37d66..1c2de6352fa5 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -605,7 +605,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {      return nullptr;    Value *SplatVal = InsElt.getOperand(1); -  InsertElementInst *CurrIE = &InsElt;   +  InsertElementInst *CurrIE = &InsElt;    SmallVector<bool, 16> ElementPresent(NumElements, false);    InsertElementInst *FirstIE = nullptr; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 12fcc8752ea9..cff0d5447290 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1424,7 +1424,7 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {        bool ConstOp1 = isa<Constant>(Inst.getOperand(1));        if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))          NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1); -       +        // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)        // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)        Value *NewLHS = isa<Constant>(LHS) ? NewC : V1; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b3f659194558..6af44354225c 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2464,10 +2464,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {    // If needed, insert __asan_init before checking for SanitizeAddress attr.    // This function needs to be called even if the function body is not -  // instrumented.   +  // instrumented.    if (maybeInsertAsanInitAtFunctionEntry(F))      FunctionModified = true; -   +    // Leave if the function doesn't need instrumentation.    if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified; diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index acd27c2e226f..132e8089fe3b 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -148,7 +148,7 @@ public:    }    StringRef getPassName() const override { return "GCOV Profiler"; } -  bool runOnModule(Module &M) override {  +  bool runOnModule(Module &M) override {      auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();      return Profiler.runOnModule(M, TLI);    } diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 22076f04d6ad..4d5dfb0aa66b 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -898,7 +898,7 @@ void InstrProfiling::emitRegistration() {    IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));    for (Value *Data : UsedVars) -    if (Data != NamesVar) +    if (Data != NamesVar && !isa<Function>(Data))        IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));    if (NamesVar) { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index fa7bcec677f7..0830ff5dd042 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -280,7 +280,7 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,      return false;    // Sign extend the offset to 64 bits (so that it is like all of the other -  // expressions).  +  // expressions).    unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();    if (OffSCEVBits < 64)      OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 3a675b979017..55759e8b1661 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -781,7 +781,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,    this->TTI = &TTI;    this->DT = &DT;    this->BFI = BFI; -  this->Entry = &Entry;   +  this->Entry = &Entry;    // Collect all constant candidates.    collectConstantCandidates(Fn); diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ea148b728a10..2f2d7f620a29 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -473,7 +473,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {      // relatively expensive analysis for constants which are obviously either      // null or non-null to start with.      if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && -        !isa<Constant>(V) &&  +        !isa<Constant>(V) &&          LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,                              ConstantPointerNull::get(Type),                              CS.getInstruction()) == LazyValueInfo::False) @@ -670,12 +670,12 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {    Value *Op0 = C->getOperand(0);    Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));    if (!Op1) return nullptr; -   +    LazyValueInfo::Tristate Result =      LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);    if (Result == LazyValueInfo::Unknown)      return nullptr; -   +    return (Result == LazyValueInfo::True) ?      ConstantInt::getTrue(C->getContext()) :      ConstantInt::getFalse(C->getContext()); @@ -747,7 +747,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,        if (auto *C = getConstantAt(RetVal, RI, LVI)) {          ++NumReturns;          RI->replaceUsesOfWith(RetVal, C); -        BBChanged = true;         +        BBChanged = true;        }      }      } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index dd1a2a6adb82..9a7405e98e7d 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -188,7 +188,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,  /// returns true, this function and getLocForRead completely describe the memory  /// operations for this instruction.  static MemoryLocation getLocForWrite(Instruction *Inst) { -   +    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))      return MemoryLocation::get(SI); diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 565745d12e99..533d16e088c8 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -384,7 +384,7 @@ public:                        LoadMapAllocator>;    LoadHTType AvailableLoads; -   +    // A scoped hash table mapping memory locations (represented as typed    // addresses) to generation numbers at which that memory location became    // (henceforth indefinitely) invariant. @@ -844,7 +844,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {      // start a scope in the current generaton which is true for all future      // generations.  Also, we dont need to consume the last store since the      // semantics of invariant.start allow us to perform   DSE of the last -    // store, if there was a store following invariant.start. Consider:  +    // store, if there was a store following invariant.start. Consider:      //      // store 30, i8* p      // invariant.start(p) @@ -852,7 +852,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {      // We can DSE the store to 30, since the store 40 to invariant location p      // causes undefined behaviour.      if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) { -      // If there are any uses, the scope might end.   +      // If there are any uses, the scope might end.        if (!Inst->use_empty())          continue;        auto *CI = cast<CallInst>(Inst); diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 28c5940db1e0..8959038de596 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -568,7 +568,7 @@ public:      ReversePostOrderTraversal<Function*> RPOT(&F);      for (auto *N : RPOT)        NumSunk += sinkBB(N); -     +      return NumSunk > 0;    } diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index ad1598d7b8bf..055fcbc8436f 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -43,6 +43,7 @@  #include <functional>  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/PostDominators.h" @@ -61,6 +62,8 @@ using namespace llvm;  #define DEBUG_TYPE "guard-widening" +STATISTIC(GuardsEliminated, "Number of eliminated guards"); +  namespace {  class GuardWideningImpl { @@ -75,21 +78,33 @@ class GuardWideningImpl {    /// The set of guards whose conditions have been widened into dominating    /// guards. -  SmallVector<IntrinsicInst *, 16> EliminatedGuards; +  SmallVector<Instruction *, 16> EliminatedGuards;    /// The set of guards which have been widened to include conditions to other    /// guards. -  DenseSet<IntrinsicInst *> WidenedGuards; +  DenseSet<Instruction *> WidenedGuards;    /// Try to eliminate guard \p Guard by widening it into an earlier dominating    /// guard.  \p DFSI is the DFS iterator on the dominator tree that is    /// currently visiting the block containing \p Guard, and \p GuardsPerBlock    /// maps BasicBlocks to the set of guards seen in that block.    bool eliminateGuardViaWidening( -      IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI, -      const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> & +      Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI, +      const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &            GuardsPerBlock); +  // Get the condition from \p GuardInst. +  Value *getGuardCondition(Instruction *GuardInst); + +  // Set the condition for \p GuardInst. +  void setGuardCondition(Instruction *GuardInst, Value *NewCond); + +  // Whether or not the particular instruction is a guard. +  bool isGuard(const Instruction *I); + +  // Eliminates the guard instruction properly. +  void eliminateGuard(Instruction *GuardInst); +    /// Used to keep track of which widening potential is more effective.    enum WideningScore {      /// Don't widen. @@ -113,9 +128,9 @@ class GuardWideningImpl {    /// Compute the score for widening the condition in \p DominatedGuard    /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in    /// \p DominatingGuardLoop). -  WideningScore computeWideningScore(IntrinsicInst *DominatedGuard, +  WideningScore computeWideningScore(Instruction *DominatedGuard,                                       Loop *DominatedGuardLoop, -                                     IntrinsicInst *DominatingGuard, +                                     Instruction *DominatingGuard,                                       Loop *DominatingGuardLoop);    /// Helper to check if \p V can be hoisted to \p InsertPos. @@ -206,10 +221,10 @@ class GuardWideningImpl {    /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to    /// whatever it is already checking). -  void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) { +  void widenGuard(Instruction *ToWiden, Value *NewCondition) {      Value *Result; -    widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result); -    ToWiden->setArgOperand(0, Result); +    widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result); +    setGuardCondition(ToWiden, Result);    }  public: @@ -225,9 +240,7 @@ public:  }  bool GuardWideningImpl::run() { -  using namespace llvm::PatternMatch; - -  DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock; +  DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;    bool Changed = false;    for (auto DFI = df_begin(Root), DFE = df_end(Root); @@ -239,8 +252,8 @@ bool GuardWideningImpl::run() {      auto &CurrentList = GuardsInBlock[BB];      for (auto &I : *BB) -      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>())) -        CurrentList.push_back(cast<IntrinsicInst>(&I)); +      if (isGuard(&I)) +        CurrentList.push_back(cast<Instruction>(&I));      for (auto *II : CurrentList)        Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock); @@ -249,16 +262,16 @@ bool GuardWideningImpl::run() {    assert(EliminatedGuards.empty() || Changed);    for (auto *II : EliminatedGuards)      if (!WidenedGuards.count(II)) -      II->eraseFromParent(); +      eliminateGuard(II);    return Changed;  }  bool GuardWideningImpl::eliminateGuardViaWidening( -    IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI, -    const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> & +    Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI, +    const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &          GuardsInBlock) { -  IntrinsicInst *BestSoFar = nullptr; +  Instruction *BestSoFar = nullptr;    auto BestScoreSoFar = WS_IllegalOrNegative;    auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent()); @@ -302,8 +315,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening(      for (auto *Candidate : make_range(I, E)) {        auto Score =            computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop); -      LLVM_DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0) -                        << " and " << *Candidate->getArgOperand(0) << " is " +      LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst) +                        << " and " << *getGuardCondition(Candidate) << " is "                          << scoreTypeToString(Score) << "\n");        if (Score > BestScoreSoFar) {          BestScoreSoFar = Score; @@ -323,16 +336,41 @@ bool GuardWideningImpl::eliminateGuardViaWidening(    LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar                      << " with score " << scoreTypeToString(BestScoreSoFar)                      << "\n"); -  widenGuard(BestSoFar, GuardInst->getArgOperand(0)); -  GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext())); +  widenGuard(BestSoFar, getGuardCondition(GuardInst)); +  setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext()));    EliminatedGuards.push_back(GuardInst);    WidenedGuards.insert(BestSoFar);    return true;  } +Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) { +  IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst); +  assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && +         "Bad guard intrinsic?"); +  return GI->getArgOperand(0); +} + +void GuardWideningImpl::setGuardCondition(Instruction *GuardInst, +                                          Value *NewCond) { +  IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst); +  assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && +         "Bad guard intrinsic?"); +  GI->setArgOperand(0, NewCond); +} + +bool GuardWideningImpl::isGuard(const Instruction* I) { +  using namespace llvm::PatternMatch; +  return match(I, m_Intrinsic<Intrinsic::experimental_guard>()); +} + +void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) { +  GuardInst->eraseFromParent(); +  ++GuardsEliminated; +} +  GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( -    IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop, -    IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) { +    Instruction *DominatedGuard, Loop *DominatedGuardLoop, +    Instruction *DominatingGuard, Loop *DominatingGuardLoop) {    bool HoistingOutOfLoop = false;    if (DominatingGuardLoop != DominatedGuardLoop) { @@ -345,7 +383,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(      HoistingOutOfLoop = true;    } -  if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard)) +  if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard))      return WS_IllegalOrNegative;    // If the guard was conditional executed, it may never be reached @@ -355,9 +393,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(    // case.  At the moment, we really only consider the second in our heuristic    // here.  TODO: evaluate cost model for spurious deopt    // NOTE: As written, this also lets us hoist right over another guard which -  // is essentially just another spelling for control flow.   -  if (isWideningCondProfitable(DominatedGuard->getArgOperand(0), -                               DominatingGuard->getArgOperand(0))) +  // is essentially just another spelling for control flow. +  if (isWideningCondProfitable(getGuardCondition(DominatedGuard), +                               getGuardCondition(DominatingGuard)))      return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;    if (HoistingOutOfLoop) @@ -369,7 +407,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(    auto MaybeHoistingOutOfIf = [&]() {      auto *DominatingBlock = DominatingGuard->getParent();      auto *DominatedBlock = DominatedGuard->getParent(); -     +      // Same Block?      if (DominatedBlock == DominatingBlock)        return false; diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index e2f29705f2dd..c5ed6d5c1b87 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -735,7 +735,7 @@ static bool isSafeDecreasingBound(const SCEV *Start,    assert(LatchBrExitIdx == 0 &&           "LatchBrExitIdx should be either 0 or 1"); -             +    const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));    unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();    APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) : @@ -786,7 +786,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,    const SCEV *StepMinusOne =      SE.getMinusSCEV(Step, SE.getOne(Step->getType()));    unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); -  APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :  +  APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :      APInt::getMaxValue(BitWidth);    const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); @@ -798,7 +798,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,  static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L,                                ScalarEvolution &SE, bool Signed) {    unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); -  APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :  +  APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :      APInt::getMinValue(BitWidth);    auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;    return SE.isAvailableAtLoopEntry(BoundSCEV, L) && diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index ff66632f0391..c4ea43a43249 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -455,7 +455,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,      // Keep track of whether the prefix of instructions visited so far are such      // that the next instruction visited is guaranteed to execute if the loop -    // is entered.   +    // is entered.      bool IsMustExecute = CurLoop->getHeader() == BB;      for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { @@ -1186,9 +1186,9 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {    if (isa<AllocaInst>(Object))      // Since the alloca goes out of scope, we know the caller can't retain a      // reference to it and be well defined.  Thus, we don't need to check for -    // capture.  +    // capture.      return true; -   +    // For all other objects we need to know that the caller can't possibly    // have gotten a reference to the object.  There are two components of    // that: @@ -1282,7 +1282,7 @@ bool llvm::promoteLoopAccessesToScalars(      // That said, we can't actually make the unwind edge explicit. Therefore,      // we have to prove that the store is dead along the unwind edge.  We do      // this by proving that the caller can't have a reference to the object -    // after return and thus can't possibly load from the object.   +    // after return and thus can't possibly load from the object.      Value *Object = GetUnderlyingObject(SomePtr, MDL);      if (!isKnownNonEscaping(Object, TLI))        return false; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d8692198f7a3..653948717fb9 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1573,7 +1573,7 @@ void LoopIdiomRecognize::transformLoopToCountable(        InitXNext =            Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));      else -      llvm_unreachable("Unexpected opcode!");       +      llvm_unreachable("Unexpected opcode!");    } else      InitXNext = InitX;    CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 561ceea1d880..cbb6594cf8f4 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -74,7 +74,7 @@  //   }  //  // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step) -//  +//  // Informal proof that the transformation above is correct:  //  //   By the definition of guards we can rewrite the guard condition to: @@ -83,7 +83,7 @@  //   Let's prove that for each iteration of the loop:  //     G(0) && M => G(I)  //   And the condition above can be simplified to G(Start) && M. -//  +//  //   Induction base.  //     G(0) && M => G(0)  // @@ -379,7 +379,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,                                      ICmpInst::Predicate Pred, const SCEV *LHS,                                      const SCEV *RHS, Instruction *InsertAt) {    // TODO: we can check isLoopEntryGuardedByCond before emitting the check -  +    Type *Ty = LHS->getType();    assert(Ty == RHS->getType() && "expandCheck operands have different types?"); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 634215c9770f..e955821effa0 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -888,7 +888,7 @@ bool llvm::computeUnrollCount(      UP.Count = 0;      return false;    } -   +    // Check if the runtime trip count is too small when profile is available.    if (L->getHeader()->getParent()->hasProfileData()) {      if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { @@ -897,7 +897,7 @@ bool llvm::computeUnrollCount(        else          UP.AllowExpensiveTripCount = true;      } -  }   +  }    // Reduce count based on the type of unrolling and the threshold values.    UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index b12586758925..6aad077ff19e 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -708,7 +708,7 @@ bool LoopUnswitch::processCurrentLoop() {        // Unswitch only those branches that are reachable.        if (isUnreachableDueToPreviousUnswitching(*I))          continue; -  +        // If this isn't branching on an invariant condition, we can't unswitch        // it.        if (BI->isConditional()) { @@ -754,7 +754,7 @@ bool LoopUnswitch::processCurrentLoop() {            // We are unswitching ~0 out.            UnswitchVal = AllOne;          } else { -          assert(OpChain == OC_OpChainNone &&  +          assert(OpChain == OC_OpChainNone &&                   "Expect to unswitch on trivial chain");            // Do not process same value again and again.            // At this point we have some cases already unswitched and @@ -1440,11 +1440,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,          // This in-loop instruction has been simplified w.r.t. its context,          // i.e. LIC != Val, make sure we propagate its replacement value to          // all its users. -        //   +        //          // We can not yet delete UI, the LIC user, yet, because that would invalidate          // the LIC->users() iterator !. However, we can make this instruction          // dead by replacing all its users and push it onto the worklist so that -        // it can be properly deleted and its operands simplified.  +        // it can be properly deleted and its operands simplified.          UI->replaceAllUsesWith(Replacement);        }      } @@ -1609,7 +1609,7 @@ Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,        LLVMContext &Ctx = Inst->getContext();        if (CI->getPredicate() == CmpInst::ICMP_EQ)          return ConstantInt::getFalse(Ctx); -      else  +      else          return ConstantInt::getTrue(Ctx);       }    } diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 2eb887c986be..3e47e9441d15 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -2007,7 +2007,7 @@ NewGVN::performSymbolicEvaluation(Value *V,      case Instruction::Load:        E = performSymbolicLoadEvaluation(I);        break; -    case Instruction::BitCast:  +    case Instruction::BitCast:        E = createExpression(I);        break;      case Instruction::ICmp: diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index c81ac70d99e6..1df0a9c49fb1 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -1179,7 +1179,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,  // and both "Res" and "ConstOpnd" remain unchanged.  bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,                                       APInt &ConstOpnd, Value *&Res) { -  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2  +  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2    //                       = ((x | c1) ^ c1) ^ (c1 ^ c2)    //                       = (x & ~c1) ^ (c1 ^ c2)    // It is useful only when c1 == c2. @@ -1202,12 +1202,12 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,      RedoInsts.insert(T);    return true;  } -                            +  // Helper function of OptimizeXor(). It tries to simplify  // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a -// symbolic value.  -//  -// If it was successful, true is returned, and the "R" and "C" is returned  +// symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned  // via "Res" and "ConstOpnd", respectively (If the entire expression is  // evaluated to a constant, the Res is set to NULL); otherwise, false is  // returned, and both "Res" and "ConstOpnd" remain unchanged. @@ -1254,7 +1254,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,      const APInt &C1 = Opnd1->getConstPart();      const APInt &C2 = Opnd2->getConstPart();      APInt C3 = C1 ^ C2; -     +      // Do not increase code size      if (!C3.isNullValue() && !C3.isAllOnesValue()) {        int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; @@ -1290,7 +1290,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,                                      SmallVectorImpl<ValueEntry> &Ops) {    if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))      return V; -       +    if (Ops.size() == 1)      return nullptr; @@ -1365,7 +1365,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,      }      // step 3.2: When previous and current operands share the same symbolic -    //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"  +    //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"      if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {        // Remove previous operand        PrevOpnd->Invalidate(); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 391e43f79121..0de2bc72b522 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -401,7 +401,7 @@ namespace {  /// defining value.  The 'base defining value' for 'Def' is the transitive  /// closure of this relation stopping at the first instruction which has no  /// immediate base defining value.  The b.d.v. might itself be a base pointer, -/// but it can also be an arbitrary derived pointer.  +/// but it can also be an arbitrary derived pointer.  struct BaseDefiningValueResult {    /// Contains the value which is the base defining value.    Value * const BDV; @@ -427,13 +427,13 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);  /// Return a base defining value for the 'Index' element of the given vector  /// instruction 'I'.  If Index is null, returns a BDV for the entire vector -/// 'I'.  As an optimization, this method will try to determine when the  +/// 'I'.  As an optimization, this method will try to determine when the  /// element is known to already be a base pointer.  If this can be established,  /// the second value in the returned pair will be true.  Note that either a  /// vector or a pointer typed value can be returned.  For the former, the  /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.  /// If the later, the return pointer is a BDV (or possibly a base) for the -/// particular element in 'I'.   +/// particular element in 'I'.  static BaseDefiningValueResult  findBaseDefiningValueOfVector(Value *I) {    // Each case parallels findBaseDefiningValue below, see that code for @@ -444,7 +444,7 @@ findBaseDefiningValueOfVector(Value *I) {      return BaseDefiningValueResult(I, true);    if (isa<Constant>(I)) -    // Base of constant vector consists only of constant null pointers.  +    // Base of constant vector consists only of constant null pointers.      // For reasoning see similar case inside 'findBaseDefiningValue' function.      return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),                                     true); @@ -508,11 +508,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {    if (isa<Constant>(I)) {      // We assume that objects with a constant base (e.g. a global) can't move      // and don't need to be reported to the collector because they are always -    // live. Besides global references, all kinds of constants (e.g. undef,  +    // live. Besides global references, all kinds of constants (e.g. undef,      // constant expressions, null pointers) can be introduced by the inliner or      // the optimizer, especially on dynamically dead paths.      // Here we treat all of them as having single null base. By doing this we -    // trying to avoid problems reporting various conflicts in a form of  +    // trying to avoid problems reporting various conflicts in a form of      // "phi (const1, const2)" or "phi (const, regular gc ptr)".      // See constant.ll file for relevant test cases. @@ -1285,14 +1285,14 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,      return Index;    };    Module *M = StatepointToken->getModule(); -   +    // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose    // element type is i8 addrspace(1)*). We originally generated unique    // declarations for each pointer type, but this proved problematic because    // the intrinsic mangling code is incomplete and fragile.  Since we're moving    // towards a single unified pointer type anyways, we can just cast everything    // to an i8* of the right address space.  A bitcast is added later to convert -  // gc_relocate to the actual value's type.   +  // gc_relocate to the actual value's type.    auto getGCRelocateDecl = [&] (Type *Ty) {      assert(isHandledGCPointerType(Ty));      auto AS = Ty->getScalarType()->getPointerAddressSpace(); @@ -1413,7 +1413,7 @@ static StringRef getDeoptLowering(CallSite CS) {    }    return "live-through";  } -     +  static void  makeStatepointExplicitImpl(const CallSite CS, /* to replace */                             const SmallVectorImpl<Value *> &BasePtrs, @@ -2570,7 +2570,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,      }    // Before we start introducing relocations, we want to tweak the IR a bit to -  // avoid unfortunate code generation effects.  The main example is that we  +  // avoid unfortunate code generation effects.  The main example is that we    // want to try to make sure the comparison feeding a branch is after any    // safepoints.  Otherwise, we end up with a comparison of pre-relocation    // values feeding a branch after relocation.  This is semantically correct, @@ -2593,7 +2593,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,      TerminatorInst *TI = BB.getTerminator();      if (auto *Cond = getConditionInst(TI))        // TODO: Handle more than just ICmps here.  We should be able to move -      // most instructions without side effects or memory access.   +      // most instructions without side effects or memory access.        if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {          MadeChange = true;          Cond->moveBefore(TI); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 6c3f012c6280..de16b608f752 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -3730,7 +3730,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {                           PartPtrTy, BasePtr->getName() + "."),            getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,            LI->getName()); -      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);  +      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);        // Append this load onto the list of split loads so we can find it later        // to rewrite the stores. diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 34510cb40732..5834b619046b 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -459,9 +459,11 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,                                                *ParentBB, *OldPH, FullUnswitch);    // Now we need to update the dominator tree. -  DT.insertEdge(OldPH, UnswitchedBB); +  SmallVector<DominatorTree::UpdateType, 2> DTUpdates; +  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});    if (FullUnswitch) -    DT.deleteEdge(ParentBB, UnswitchedBB); +    DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB}); +  DT.applyUpdates(DTUpdates);    // The constant we can replace all of our invariants with inside the loop    // body. If any of the invariants have a value other than this the loop won't diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 5f5c4150d3bb..d0396e6ce47d 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -911,7 +911,7 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,        NameBuffer += 'l';      Name = NameBuffer; -  }   +  }  }  Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp index 4d9c22e57a68..6d18d0614611 100644 --- a/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -392,7 +392,7 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,    auto CalleeType = Callee->getFunctionType();    auto CalleeParamNum = CalleeType->getNumParams();    for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { -    auto *Arg = CS.getArgument(ArgNo);  +    auto *Arg = CS.getArgument(ArgNo);      Type *FormalTy = CalleeType->getParamType(ArgNo);      Type *ActualTy = Arg->getType();      if (FormalTy != ActualTy) { diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 61448e9acb57..807360340055 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -290,7 +290,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,    // Have we already cloned this block?    if (BBEntry) return; -   +    // Nope, clone it now.    BasicBlock *NewBB;    BBEntry = NewBB = BasicBlock::Create(BB->getContext()); @@ -363,7 +363,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,          hasDynamicAllocas = true;      }    } -   +    // Finally, clone over the terminator.    const TerminatorInst *OldTI = BB->getTerminator();    bool TerminatorDone = false; @@ -400,7 +400,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,        TerminatorDone = true;      }    } -   +    if (!TerminatorDone) {      Instruction *NewInst = OldTI->clone();      if (OldTI->hasName()) @@ -418,11 +418,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,      for (const BasicBlock *Succ : TI->successors())        ToClone.push_back(Succ);    } -   +    if (CodeInfo) {      CodeInfo->ContainsCalls          |= hasCalls;      CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; -    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&  +    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&        BB != &BB->getParent()->front();    }  } @@ -468,7 +468,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,      CloneWorklist.pop_back();      PFC.CloneBlock(BB, BB->begin(), CloneWorklist);    } -   +    // Loop over all of the basic blocks in the old function.  If the block was    // reachable, we have cloned it and the old block is now in the value map:    // insert it into the new function in the right order.  If not, ignore it. @@ -500,7 +500,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,                       TypeMapper, Materializer);    } -   +    // Defer PHI resolution until rest of function is resolved, PHI resolution    // requires the CFG to be up-to-date.    for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { @@ -519,7 +519,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,          Value *V = VMap.lookup(PN->getIncomingBlock(pred));          if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {            Value *InVal = MapValue(PN->getIncomingValue(pred), -                                  VMap,  +                                  VMap,                          ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);            assert(InVal && "Unknown input value?");            PN->setIncomingValue(pred, InVal); @@ -529,9 +529,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,            --pred;  // Revisit the next entry.            --e;          } -      }  +      }      } -     +      // The loop above has removed PHI entries for those blocks that are dead      // and has updated others.  However, if a block is live (i.e. copied over)      // but its terminator has been changed to not go to this block, then our @@ -546,11 +546,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,        for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);             PI != E; ++PI)          --PredCount[*PI]; -       +        // Figure out how many entries to remove from each PHI.        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)          ++PredCount[PN->getIncomingBlock(i)]; -       +        // At this point, the excess predecessor entries are positive in the        // map.  Loop over all of the PHIs and remove excess predecessor        // entries. @@ -563,7 +563,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,          }        }      } -     +      // If the loops above have made these phi nodes have 0 or 1 operand,      // replace them with undef or the input value.  We must do this for      // correctness, because 0-operand phis are not valid. @@ -655,7 +655,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,      BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());      if (!BI || BI->isConditional()) { ++I; continue; } -     +      BasicBlock *Dest = BI->getSuccessor(0);      if (!Dest->getSinglePredecessor()) {        ++I; continue; @@ -668,16 +668,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,      // We know all single-entry PHI nodes in the inlined function have been      // removed, so we just need to splice the blocks.      BI->eraseFromParent(); -     +      // Make all PHI nodes that referred to Dest now refer to I as their source.      Dest->replaceAllUsesWith(&*I);      // Move all the instructions in the succ to the pred.      I->getInstList().splice(I->end(), Dest->getInstList()); -     +      // Remove the dest block.      Dest->eraseFromParent(); -     +      // Do not increment I, iteratively merge all things this block branches to.    } @@ -703,7 +703,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,                                       ValueToValueMapTy &VMap,                                       bool ModuleLevelChanges,                                       SmallVectorImpl<ReturnInst*> &Returns, -                                     const char *NameSuffix,  +                                     const char *NameSuffix,                                       ClonedCodeInfo *CodeInfo,                                       Instruction *TheCall) {    CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, @@ -730,7 +730,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,                                     const Twine &NameSuffix, LoopInfo *LI,                                     DominatorTree *DT,                                     SmallVectorImpl<BasicBlock *> &Blocks) { -  assert(OrigLoop->getSubLoops().empty() &&  +  assert(OrigLoop->getSubLoops().empty() &&           "Loop to be cloned cannot have inner loop");    Function *F = OrigLoop->getHeader()->getParent();    Loop *ParentLoop = OrigLoop->getParentLoop(); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 35c7511a24b9..c7d68bab8170 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -61,7 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule(    //    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();         I != E; ++I) { -    GlobalVariable *GV = new GlobalVariable(*New,  +    GlobalVariable *GV = new GlobalVariable(*New,                                              I->getValueType(),                                              I->isConstant(), I->getLinkage(),                                              (Constant*) nullptr, I->getName(), @@ -110,7 +110,7 @@ std::unique_ptr<Module> llvm::CloneModule(      GA->copyAttributesFrom(&*I);      VMap[&*I] = GA;    } -   +    // Now that all of the things that global variable initializer can refer to    // have been created, loop through and copy the global variable referrers    // over...  We also set the attributes on the global now. diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index f31dab9f96af..cb349e34606c 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -1020,7 +1020,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,      } else {        // Otherwise we must have code extracted an unwind or something, just        // return whatever we want. -      ReturnInst::Create(Context,  +      ReturnInst::Create(Context,                           Constant::getNullValue(OldFnRetTy), TheSwitch);      } @@ -1158,13 +1158,13 @@ Function *CodeExtractor::extractCodeRegion() {    splitReturnBlocks();    // This takes place of the original loop -  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),  +  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),                                                  "codeRepl", oldFunction,                                                  header);    // The new function needs a root node because other nodes can branch to the    // head of the region, but the entry node of a function cannot have preds. -  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),  +  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),                                                 "newFuncRoot");    auto *BranchI = BranchInst::Create(header);    // If the original function has debug info, we have to add a debug location diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0315aac1cf84..ddc6e07e2f59 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -1199,7 +1199,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,      // Only copy the edge if the call was inlined!      if (VMI == VMap.end() || VMI->second == nullptr)        continue; -     +      // If the call was inlined, but then constant folded, there is no edge to      // add.  Check for this case.      Instruction *NewCall = dyn_cast<Instruction>(VMI->second); @@ -1211,7 +1211,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,      CallSite CS = CallSite(NewCall);      if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())        continue; -     +      // Remember that this call site got inlined for the client of      // InlineFunction.      IFI.InlinedCalls.push_back(NewCall); @@ -1231,7 +1231,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,      CallerNode->addCalledFunction(CallSite(NewCall), I->second);    } -   +    // Update the call graph by deleting the edge from Callee to Caller.  We must    // do this after the loop above in case Caller and Callee are the same.    CallerNode->removeCallEdgeFor(CS); @@ -1380,7 +1380,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,        if (CalleeHasDebugInfo)          continue; -       +        // If the inlined instruction has no line number, make it look as if it        // originates from the call location. This is important for        // ((__always_inline__, __nodebug__)) functions which must use caller @@ -1777,7 +1777,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,           E = FirstNewBlock->end(); I != E; ) {        AllocaInst *AI = dyn_cast<AllocaInst>(I++);        if (!AI) continue; -       +        // If the alloca is now dead, remove it.  This often occurs due to code        // specialization.        if (AI->use_empty()) { @@ -1787,10 +1787,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,        if (!allocaWouldBeStaticInEntry(AI))          continue; -       +        // Keep track of the static allocas that we inline into the caller.        IFI.StaticAllocas.push_back(AI); -       +        // Scan for the block of allocas that we can move over, and move them        // all at once.        while (isa<AllocaInst>(I) && diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 3fbb3487884b..4a359b99bebd 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -476,10 +476,10 @@ bool llvm::expandDivision(BinaryOperator *Div) {    return true;  } -/// Generate code to compute the remainder of two integers of bitwidth up to  +/// Generate code to compute the remainder of two integers of bitwidth up to  /// 32 bits. Uses the above routines and extends the inputs/truncates the  /// outputs to operate in 32 bits; that is, these routines are good for targets -/// that have no or very little suppport for smaller than 32 bit integer  +/// that have no or very little suppport for smaller than 32 bit integer  /// arithmetic.  ///  /// Replace Rem with emulation code. @@ -527,7 +527,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {    return expandRemainder(cast<BinaryOperator>(ExtRem));  } -/// Generate code to compute the remainder of two integers of bitwidth up to  +/// Generate code to compute the remainder of two integers of bitwidth up to  /// 64 bits. Uses the above routines and extends the inputs/truncates the  /// outputs to operate in 64 bits.  /// @@ -613,7 +613,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {    } else {      ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);      ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); -    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);   +    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);    }    Trunc = Builder.CreateTrunc(ExtDiv, DivTy); @@ -662,7 +662,7 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {    } else {      ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);      ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); -    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);   +    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);    }    Trunc = Builder.CreateTrunc(ExtDiv, DivTy); diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 956d0387c7a8..a1f8e7484bcf 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -10,7 +10,7 @@  // This pass transforms loops by placing phi nodes at the end of the loops for  // all values that are live across the loop boundary.  For example, it turns  // the left into the right code: -//  +//  // for (...)                for (...)  //   if (c)                   if (c)  //     X1 = ...                 X1 = ... @@ -21,8 +21,8 @@  //                          ... = X4 + 4  //  // This is still valid LLVM; the extra phi nodes are purely redundant, and will -// be trivially eliminated by InstCombine.  The major benefit of this  -// transformation is that it makes many other loop optimizations, such as  +// be trivially eliminated by InstCombine.  The major benefit of this +// transformation is that it makes many other loop optimizations, such as  // LoopUnswitching, simpler.  //  //===----------------------------------------------------------------------===// @@ -144,7 +144,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,        PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),                                      I->getName() + ".lcssa", &ExitBB->front()); - +      // Get the debug location from the original instruction. +      PN->setDebugLoc(I->getDebugLoc());        // Add inputs from inside the loop for this PHI.        for (BasicBlock *Pred : PredCache.get(ExitBB)) {          PN->addIncoming(I, Pred); diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 13794c53f24b..78afe748e596 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -344,7 +344,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,  /// Update the branch weights of the latch of a peeled-off loop  /// iteration.  /// This sets the branch weights for the latch of the recently peeled off loop -/// iteration correctly.  +/// iteration correctly.  /// Our goal is to make sure that:  /// a) The total weight of all the copies of the loop body is preserved.  /// b) The total weight of the loop exit is preserved. @@ -544,7 +544,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,    //    // Each following iteration will split the current bottom anchor in two,    // and put the new copy of the loop body between these two blocks. That is, -  // after peeling another iteration from the example above, we'll split  +  // after peeling another iteration from the example above, we'll split    // InsertBot, and get:    //    // InsertTop: diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 323f2552ca80..88d595ee02ab 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -68,7 +68,7 @@ namespace {      PRNG prng;    }; -   +    struct MetaRenamer : public ModulePass {      // Pass identification, replacement for typeid      static char ID; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index ca184ed7c4e3..4a1fd8d571aa 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -201,13 +201,13 @@ void SSAUpdater::RewriteUse(Use &U) {  void SSAUpdater::RewriteUseAfterInsertions(Use &U) {    Instruction *User = cast<Instruction>(U.getUser()); -   +    Value *V;    if (PHINode *UserPN = dyn_cast<PHINode>(User))      V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));    else      V = GetValueAtEndOfBlock(User->getParent()); -   +    U.set(V);  } @@ -235,7 +235,7 @@ public:      PHI_iterator(PHINode *P, bool) // end iterator        : PHI(P), idx(PHI->getNumIncomingValues()) {} -    PHI_iterator &operator++() { ++idx; return *this; }  +    PHI_iterator &operator++() { ++idx; return *this; }      bool operator==(const PHI_iterator& x) const { return idx == x.idx; }      bool operator!=(const PHI_iterator& x) const { return !operator==(x); } @@ -333,7 +333,7 @@ LoadAndStorePromoter::  LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,                       SSAUpdater &S, StringRef BaseName) : SSA(S) {    if (Insts.empty()) return; -   +    const Value *SomeVal;    if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))      SomeVal = LI; @@ -354,7 +354,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {    for (Instruction *User : Insts)      UsesByBlock[User->getParent()].push_back(User); -   +    // Okay, now we can iterate over all the blocks in the function with uses,    // processing them.  Keep track of which loads are loading a live-in value.    // Walk the uses in the use-list order to be determinstic. @@ -364,10 +364,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {    for (Instruction *User : Insts) {      BasicBlock *BB = User->getParent();      TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB]; -     +      // If this block has already been processed, ignore this repeat use.      if (BlockUses.empty()) continue; -     +      // Okay, this is the first use in the block.  If this block just has a      // single user in it, we can rewrite it trivially.      if (BlockUses.size() == 1) { @@ -375,13 +375,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {          updateDebugInfo(SI);          SSA.AddAvailableValue(BB, SI->getOperand(0)); -      } else  +      } else          // Otherwise it is a load, queue it to rewrite as a live-in load.          LiveInLoads.push_back(cast<LoadInst>(User));        BlockUses.clear();        continue;      } -     +      // Otherwise, check to see if this block is all loads.      bool HasStore = false;      for (Instruction *I : BlockUses) { @@ -390,7 +390,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {          break;        }      } -     +      // If so, we can queue them all as live in loads.  We don't have an      // efficient way to tell which on is first in the block and don't want to      // scan large blocks, so just add all loads as live ins. @@ -400,7 +400,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {        BlockUses.clear();        continue;      } -     +      // Otherwise, we have mixed loads and stores (or just a bunch of stores).      // Since SSAUpdater is purely for cross-block values, we need to determine      // the order of these instructions in the block.  If the first use in the @@ -411,7 +411,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {        if (LoadInst *L = dyn_cast<LoadInst>(&I)) {          // If this is a load from an unrelated pointer, ignore it.          if (!isInstInList(L, Insts)) continue; -         +          // If we haven't seen a store yet, this is a live in use, otherwise          // use the stored value.          if (StoredValue) { @@ -433,13 +433,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {          StoredValue = SI->getOperand(0);        }      } -     +      // The last stored value that happened is the live-out for the block.      assert(StoredValue && "Already checked that there is a store in block");      SSA.AddAvailableValue(BB, StoredValue);      BlockUses.clear();    } -   +    // Okay, now we rewrite all loads that use live-in values in the loop,    // inserting PHI nodes as necessary.    for (LoadInst *ALoad : LiveInLoads) { @@ -451,10 +451,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {      ALoad->replaceAllUsesWith(NewVal);      ReplacedLoads[ALoad] = NewVal;    } -   +    // Allow the client to do stuff before we start nuking things.    doExtraRewritesBeforeFinalDeletion(); -   +    // Now that everything is rewritten, delete the old instructions from the    // function.  They should all be dead now.    for (Instruction *User : Insts) { @@ -465,7 +465,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {      if (!User->use_empty()) {        Value *NewVal = ReplacedLoads[User];        assert(NewVal && "not a replaced load?"); -       +        // Propagate down to the ultimate replacee.  The intermediately loads        // could theoretically already have been deleted, so we don't want to        // dereference the Value*'s. @@ -474,11 +474,11 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {          NewVal = RLI->second;          RLI = ReplacedLoads.find(NewVal);        } -       +        replaceLoadWithValue(cast<LoadInst>(User), NewVal);        User->replaceAllUsesWith(NewVal);      } -     +      instructionDeleted(User);      User->eraseFromParent();    } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index e381fbc34ab4..65b23f4d94a1 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -196,7 +196,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,    SmallDenseMap<const SCEV*, Value*> CheapExpansions;    CheapExpansions[S] = ICmp->getOperand(IVOperIdx);    CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx); -   +    // TODO: Support multiple entry loops?  (We currently bail out of these in    // the IndVarSimplify pass)    if (auto *BB = L->getLoopPredecessor()) { diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8c48597fc2e4..15e035874002 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -890,7 +890,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,      return nullptr;    // Replace the malloc with a calloc. We need the data layout to know what the -  // actual size of a 'size_t' parameter is.  +  // actual size of a 'size_t' parameter is.    B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());    const DataLayout &DL = Malloc->getModule()->getDataLayout();    IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); @@ -970,7 +970,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,    Value *V = valueHasFloatPrecision(CI->getArgOperand(0));    if (V == nullptr)      return nullptr; -   +    // If call isn't an intrinsic, check that it isn't within a function with the    // same name as the float version of this call.    // @@ -1126,165 +1126,164 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {    if (!Pow->isFast())      return nullptr; -  const APFloat *Arg1C; -  if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C))) -    return nullptr; -  if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5)) +  Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); +  Type *Ty = Pow->getType(); + +  const APFloat *ExpoF; +  if (!match(Expo, m_APFloat(ExpoF)) || +      (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))      return nullptr; -  // Fast-math flags from the pow() are propagated to all replacement ops. -  IRBuilder<>::FastMathFlagGuard Guard(B); -  B.setFastMathFlags(Pow->getFastMathFlags()); -  Type *Ty = Pow->getType(); -  Value *Sqrt; +  // If errno is never set, then use the intrinsic for sqrt().    if (Pow->hasFnAttr(Attribute::ReadNone)) { -    // We know that errno is never set, so replace with an intrinsic: -    // pow(x, 0.5) --> llvm.sqrt(x) -    // llvm.pow(x, 0.5) --> llvm.sqrt(x) -    auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty); -    Sqrt = B.CreateCall(F, Pow->getArgOperand(0)); -  } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, -                             LibFunc_sqrtl)) { -    // Errno could be set, so we must use a sqrt libcall. -    // TODO: We also should check that the target can in fact lower the sqrt -    // libcall. We currently have no way to ask this question, so we ask -    // whether the target has a sqrt libcall which is not exactly the same. -    Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0), -                                TLI->getName(LibFunc_sqrt), B, +    Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(), +                                                 Intrinsic::sqrt, Ty); +    Sqrt = B.CreateCall(SqrtFn, Base); +  } +  // Otherwise, use the libcall for sqrt(). +  else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) +    // TODO: We also should check that the target can in fact lower the sqrt() +    // libcall. We currently have no way to ask this question, so we ask if +    // the target has a sqrt() libcall, which is not exactly the same. +    Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,                                  Pow->getCalledFunction()->getAttributes()); -  } else { -    // We can't replace with an intrinsic or a libcall. +  else      return nullptr; -  } -  // If this is pow(x, -0.5), get the reciprocal. -  if (Arg1C->isExactlyValue(-0.5)) -    Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt); +  // If the exponent is negative, then get the reciprocal. +  if (ExpoF->isNegative()) +    Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");    return Sqrt;  } -Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { -  Function *Callee = CI->getCalledFunction(); -  Value *Ret = nullptr; +Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { +  Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); +  Function *Callee = Pow->getCalledFunction(); +  AttributeList Attrs = Callee->getAttributes();    StringRef Name = Callee->getName(); -  if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name)) -    Ret = optimizeUnaryDoubleFP(CI, B, true); +  Module *Module = Pow->getModule(); +  Type *Ty = Pow->getType(); +  Value *Shrunk = nullptr; +  bool Ignored; + +  if (UnsafeFPShrink && +      Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) +    Shrunk = optimizeUnaryDoubleFP(Pow, B, true); + +  // Propagate the math semantics from the call to any created instructions. +  IRBuilder<>::FastMathFlagGuard Guard(B); +  B.setFastMathFlags(Pow->getFastMathFlags()); -  Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); +  // Evaluate special cases related to the base.    // pow(1.0, x) -> 1.0 -  if (match(Op1, m_SpecificFP(1.0))) -    return Op1; -  // pow(2.0, x) -> llvm.exp2(x) -  if (match(Op1, m_SpecificFP(2.0))) { -    Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2, -                                            CI->getType()); -    return B.CreateCall(Exp2, Op2, "exp2"); -  } - -  // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will -  // be one. -  if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { -    // pow(10.0, x) -> exp10(x) -    if (Op1C->isExactlyValue(10.0) && -        hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f, -                        LibFunc_exp10l)) -      return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B, -                                  Callee->getAttributes()); +  if (match(Base, m_SpecificFP(1.0))) +    return Base; + +  // pow(2.0, x) -> exp2(x) +  if (match(Base, m_SpecificFP(2.0))) { +    Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty); +    return B.CreateCall(Exp2, Expo, "exp2");    } +  // pow(10.0, x) -> exp10(x) +  if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base)) +    // There's no exp10 intrinsic yet, but, maybe, some day there shall be one. +    if (BaseC->isExactlyValue(10.0) && +        hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) +      return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs); +    // pow(exp(x), y) -> exp(x * y)    // pow(exp2(x), y) -> exp2(x * y)    // We enable these only with fast-math. Besides rounding differences, the    // transformation changes overflow and underflow behavior quite dramatically.    // Example: x = 1000, y = 0.001.    // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). -  auto *OpC = dyn_cast<CallInst>(Op1); -  if (OpC && OpC->isFast() && CI->isFast()) { -    LibFunc Func; -    Function *OpCCallee = OpC->getCalledFunction(); -    if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && -        TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) { +  auto *BaseFn = dyn_cast<CallInst>(Base); +  if (BaseFn && BaseFn->isFast() && Pow->isFast()) { +    LibFunc LibFn; +    Function *CalleeFn = BaseFn->getCalledFunction(); +    if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && +        (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {        IRBuilder<>::FastMathFlagGuard Guard(B); -      B.setFastMathFlags(CI->getFastMathFlags()); -      Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); -      return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, -                                  OpCCallee->getAttributes()); +      B.setFastMathFlags(Pow->getFastMathFlags()); + +      Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); +      return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B, +                                  CalleeFn->getAttributes());      }    } -  if (Value *Sqrt = replacePowWithSqrt(CI, B)) +  // Evaluate special cases related to the exponent. + +  if (Value *Sqrt = replacePowWithSqrt(Pow, B))      return Sqrt; -  ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); -  if (!Op2C) -    return Ret; +  ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo); +  if (!ExpoC) +    return Shrunk; -  if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 -    return ConstantFP::get(CI->getType(), 1.0); +  // pow(x, -1.0) -> 1.0 / x +  if (ExpoC->isExactlyValue(-1.0)) +    return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); -  // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). -  if (Op2C->isExactlyValue(0.5) && -      hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf, -                      LibFunc_sqrtl)) { -    // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). -    // This is faster than calling pow, and still handles negative zero -    // and negative infinity correctly. -    // TODO: In finite-only mode, this could be just fabs(sqrt(x)). -    Value *Inf = ConstantFP::getInfinity(CI->getType()); -    Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); +  // pow(x, 0.0) -> 1.0 +  if (ExpoC->getValueAPF().isZero()) +    return ConstantFP::get(Ty, 1.0); -    // TODO: As above, we should lower to the sqrt intrinsic if the pow is an -    // intrinsic, to match errno semantics. -    Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes()); +  // pow(x, 1.0) -> x +  if (ExpoC->isExactlyValue(1.0)) +    return Base; -    Module *M = Callee->getParent(); -    Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs, -                                                CI->getType()); -    Value *FAbs = B.CreateCall(FabsF, Sqrt); +  // pow(x, 2.0) -> x * x +  if (ExpoC->isExactlyValue(2.0)) +    return B.CreateFMul(Base, Base, "square"); -    Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); -    Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); -    return Sel; +  // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). +  if (ExpoC->isExactlyValue(0.5) && +      hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { +    // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). +    // This is faster than calling pow(), and still handles -0.0 and +    // negative infinity correctly. +    // TODO: In finite-only mode, this could be just fabs(sqrt(x)). +    Value *PosInf = ConstantFP::getInfinity(Ty); +    Value *NegInf = ConstantFP::getInfinity(Ty, true); + +    // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is +    // an intrinsic, to match errno semantics. +    Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), +                                       B, Attrs); +    Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty); +    Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs"); +    Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); +    Sqrt = B.CreateSelect(FCmp, PosInf, FAbs); +    return Sqrt;    } -  // Propagate fast-math-flags from the call to any created instructions. -  IRBuilder<>::FastMathFlagGuard Guard(B); -  B.setFastMathFlags(CI->getFastMathFlags()); -  // pow(x, 1.0) --> x -  if (Op2C->isExactlyValue(1.0)) -    return Op1; -  // pow(x, 2.0) --> x * x -  if (Op2C->isExactlyValue(2.0)) -    return B.CreateFMul(Op1, Op1, "pow2"); -  // pow(x, -1.0) --> 1.0 / x -  if (Op2C->isExactlyValue(-1.0)) -    return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); - -  // In -ffast-math, generate repeated fmul instead of generating pow(x, n). -  if (CI->isFast()) { -    APFloat V = abs(Op2C->getValueAPF()); -    // We limit to a max of 7 fmul(s). Thus max exponent is 32. +  // pow(x, n) -> x * x * x * .... +  if (Pow->isFast()) { +    APFloat ExpoA = abs(ExpoC->getValueAPF()); +    // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.      // This transformation applies to integer exponents only. -    if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || -        !V.isInteger()) +    if (!ExpoA.isInteger() || +        ExpoA.compare +            (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)        return nullptr;      // We will memoize intermediate products of the Addition Chain.      Value *InnerChain[33] = {nullptr}; -    InnerChain[1] = Op1; -    InnerChain[2] = B.CreateFMul(Op1, Op1); +    InnerChain[1] = Base; +    InnerChain[2] = B.CreateFMul(Base, Base, "square");      // We cannot readily convert a non-double type (like float) to a double. -    // So we first convert V to something which could be converted to double. -    bool Ignored; -    V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); -     -    Value *FMul = getPow(InnerChain, V.convertToDouble(), B); -    // For negative exponents simply compute the reciprocal. -    if (Op2C->isNegative()) -      FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); +    // So we first convert it to something which could be converted to double. +    ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); +    Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + +    // If the exponent is negative, then get the reciprocal. +    if (ExpoC->isNegative()) +      FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");      return FMul;    } diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 3640541e63cc..fd0da79487f1 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -536,7 +536,7 @@ private:  char RewriteSymbolsLegacyPass::ID = 0;  RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) { -  initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());   +  initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());  }  RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index e633ac0c874d..d49b26472548 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -61,7 +61,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {    } else if (UnreachableBlocks.size() == 1) {      UnreachableBlock = UnreachableBlocks.front();    } else { -    UnreachableBlock = BasicBlock::Create(F.getContext(),  +    UnreachableBlock = BasicBlock::Create(F.getContext(),                                            "UnifiedUnreachableBlock", &F);      new UnreachableInst(F.getContext(), UnreachableBlock); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3c693f5d5ee0..859d0c92ca5a 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -535,13 +535,13 @@ protected:    /// Returns true if we should generate a scalar version of \p IV.    bool needsScalarInduction(Instruction *IV) const; -  /// If there is a cast involved in the induction variable \p ID, which should  -  /// be ignored in the vectorized loop body, this function records the  -  /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the  -  /// cast. We had already proved that the casted Phi is equal to the uncasted  -  /// Phi in the vectorized loop (under a runtime guard), and therefore  -  /// there is no need to vectorize the cast - the same value can be used in the  -  /// vector loop for both the Phi and the cast.  +  /// If there is a cast involved in the induction variable \p ID, which should +  /// be ignored in the vectorized loop body, this function records the +  /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the +  /// cast. We had already proved that the casted Phi is equal to the uncasted +  /// Phi in the vectorized loop (under a runtime guard), and therefore +  /// there is no need to vectorize the cast - the same value can be used in the +  /// vector loop for both the Phi and the cast.    /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,    /// Otherwise, \p VectorLoopValue is a widened/vectorized value.    /// @@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){    // high enough value to practically disable vectorization with such    // operations, except where previously deployed legality hack allowed    // using very low cost values. This is to avoid regressions coming simply -  // from moving "masked load/store" check from legality to cost model.  +  // from moving "masked load/store" check from legality to cost model.    // Masked Load/Gather emulation was previously never allowed.    // Limited number of Masked Store/Scatter emulation was allowed.    assert(isScalarWithPredication(I) && @@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(          }))        DeadInstructions.insert(IndUpdate); -    // We record as "Dead" also the type-casting instructions we had identified  +    // We record as "Dead" also the type-casting instructions we had identified      // during induction analysis. We don't need any handling for them in the -    // vectorized loop because we have proven that, under a proper runtime  -    // test guarding the vectorized loop, the value of the phi, and the casted  +    // vectorized loop because we have proven that, under a proper runtime +    // test guarding the vectorized loop, the value of the phi, and the casted      // value of the phi, are the same. The last instruction in this casting chain -    // will get its scalar/vector/widened def from the scalar/vector/widened def  +    // will get its scalar/vector/widened def from the scalar/vector/widened def      // of the respective phi node. Any other casts in the induction def-use chain      // have no other uses outside the phi update chain, and will be ignored.      InductionDescriptor &IndDes = Induction.second; @@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {    auto Plan = llvm::make_unique<VPlan>();    // Build hierarchical CFG -  VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); -  HCFGBuilder.buildHierarchicalCFG(*Plan.get()); +  VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); +  HCFGBuilder.buildHierarchicalCFG();    return Plan;  } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ac8c4f046c6f..5c2efe885e22 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {  }  /// \returns analysis of the Instructions in \p VL described in -/// InstructionsState, the Opcode that we suppose the whole list  +/// InstructionsState, the Opcode that we suppose the whole list  /// could be vectorized even if its structure is diverse.  static InstructionsState getSameOpcode(ArrayRef<Value *> VL,                                         unsigned BaseIndex = 0) { @@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {            // TODO: Merge this shuffle with the ReorderShuffleMask.            if (!E->ReorderIndices.empty())              Builder.SetInsertPoint(VL0); +          else if (auto *I = dyn_cast<Instruction>(V)) +            Builder.SetInsertPoint(I->getParent(), +                                   std::next(I->getIterator())); +          else +            Builder.SetInsertPoint(&F->getEntryBlock(), +                                   F->getEntryBlock().getFirstInsertionPt());            V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),                                            E->ReuseShuffleIndices, "shuffle");          } diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index f7b07b722bb1..0780e70809d0 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -18,6 +18,7 @@  //===----------------------------------------------------------------------===//  #include "VPlan.h" +#include "VPlanDominatorTree.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/PostOrderIterator.h"  #include "llvm/ADT/SmallVector.h" @@ -25,7 +26,6 @@  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h"  #include "llvm/IR/InstrTypes.h"  #include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h" @@ -34,6 +34,7 @@  #include "llvm/Support/Casting.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h"  #include "llvm/Support/GraphWriter.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,    }    O << "\\l\"";  } + +template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 866951cb79a4..883e6f52369a 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -26,8 +26,10 @@  #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H  #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H +#include "VPlanLoopInfo.h"  #include "VPlanValue.h"  #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/GraphTraits.h"  #include "llvm/ADT/Optional.h"  #include "llvm/ADT/SmallPtrSet.h" @@ -51,7 +53,6 @@ class BasicBlock;  class DominatorTree;  class InnerLoopVectorizer;  class InterleaveGroup; -class LoopInfo;  class raw_ostream;  class Value;  class VPBasicBlock; @@ -516,6 +517,23 @@ public:    /// Delete all blocks reachable from a given VPBlockBase, inclusive.    static void deleteCFG(VPBlockBase *Entry); + +  void printAsOperand(raw_ostream &OS, bool PrintType) const { +    OS << getName(); +  } + +  void print(raw_ostream &OS) const { +    // TODO: Only printing VPBB name for now since we only have dot printing +    // support for VPInstructions/Recipes. +    printAsOperand(OS, false); +  } + +  /// Return true if it is legal to hoist instructions into this block. +  bool isLegalToHoistInto() { +    // There are currently no constraints that prevent an instruction to be +    // hoisted into a VPBlockBase. +    return true; +  }  };  /// VPRecipeBase is a base class modeling a sequence of one or more output IR @@ -1037,6 +1055,12 @@ public:      EntryBlock->setParent(this);    } +  // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a +  // specific interface of llvm::Function, instead of using +  // GraphTraints::getEntryNode. We should add a new template parameter to +  // DominatorTreeBase representing the Graph type. +  VPBlockBase &front() const { return *Entry; } +    const VPBlockBase *getExit() const { return Exit; }    VPBlockBase *getExit() { return Exit; } @@ -1087,6 +1111,9 @@ private:    /// VPlan.    Value2VPValueTy Value2VPValue; +  /// Holds the VPLoopInfo analysis for this VPlan. +  VPLoopInfo VPLInfo; +  public:    VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} @@ -1133,6 +1160,10 @@ public:      return Value2VPValue[V];    } +  /// Return the VPLoopInfo analysis for this VPlan. +  VPLoopInfo &getVPLoopInfo() { return VPLInfo; } +  const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } +  private:    /// Add to the given dominator tree the header block and every new basic block    /// that was created between it and the latch block, inclusive. @@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {    return OS;  } -//===--------------------------------------------------------------------===// -// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs  // -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     // +//===----------------------------------------------------------------------===// -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... +// The following set of template specializations implement GraphTraits to treat +// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note +// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the +// VPBlockBase is a VPRegionBlock, this specialization provides access to its +// successors/predecessors but not to the blocks inside the region.  template <> struct GraphTraits<VPBlockBase *> {    using NodeRef = VPBlockBase *; @@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> {    }  }; -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order -// for a VPBlockBase is considered to be when traversing the predecessors of a -// VPBlockBase instead of its successors. +// Inverse order specialization for VPBasicBlocks. Predecessors are used instead +// of successors for the inverse traversal.  template <> struct GraphTraits<Inverse<VPBlockBase *>> {    using NodeRef = VPBlockBase *;    using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; -  static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) { -    return B; -  } +  static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }    static inline ChildIteratorType child_begin(NodeRef N) {      return N->getPredecessors().begin(); @@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> {    }  }; +// The following set of template specializations implement GraphTraits to +// treat VPRegionBlock as a graph and recurse inside its nodes. It's important +// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases +// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so +// there won't be automatic recursion into other VPBlockBases that turn to be +// VPRegionBlocks. + +template <> +struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { +  using GraphRef = VPRegionBlock *; +  using nodes_iterator = df_iterator<NodeRef>; + +  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + +  static nodes_iterator nodes_begin(GraphRef N) { +    return nodes_iterator::begin(N->getEntry()); +  } + +  static nodes_iterator nodes_end(GraphRef N) { +    // df_iterator::end() returns an empty iterator so the node used doesn't +    // matter. +    return nodes_iterator::end(N); +  } +}; + +template <> +struct GraphTraits<const VPRegionBlock *> +    : public GraphTraits<const VPBlockBase *> { +  using GraphRef = const VPRegionBlock *; +  using nodes_iterator = df_iterator<NodeRef>; + +  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + +  static nodes_iterator nodes_begin(GraphRef N) { +    return nodes_iterator::begin(N->getEntry()); +  } + +  static nodes_iterator nodes_end(GraphRef N) { +    // df_iterator::end() returns an empty iterator so the node used doesn't +    // matter. +    return nodes_iterator::end(N); +  } +}; + +template <> +struct GraphTraits<Inverse<VPRegionBlock *>> +    : public GraphTraits<Inverse<VPBlockBase *>> { +  using GraphRef = VPRegionBlock *; +  using nodes_iterator = df_iterator<NodeRef>; + +  static NodeRef getEntryNode(Inverse<GraphRef> N) { +    return N.Graph->getExit(); +  } + +  static nodes_iterator nodes_begin(GraphRef N) { +    return nodes_iterator::begin(N->getExit()); +  } + +  static nodes_iterator nodes_end(GraphRef N) { +    // df_iterator::end() returns an empty iterator so the node used doesn't +    // matter. +    return nodes_iterator::end(N); +  } +}; +  //===----------------------------------------------------------------------===//  // VPlan Utilities  //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h new file mode 100644 index 000000000000..1b81097b6d31 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -0,0 +1,41 @@ +//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements dominator tree analysis for a single level of a VPlan's +/// H-CFG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H + +#include "VPlan.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/IR/Dominators.h" + +namespace llvm { + +/// Template specialization of the standard LLVM dominator tree utility for +/// VPBlockBases. +using VPDominatorTree = DomTreeBase<VPBlockBase>; + +using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>; + +/// Template specializations of GraphTraits for VPDomTreeNode. +template <> +struct GraphTraits<VPDomTreeNode *> +    : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {}; + +template <> +struct GraphTraits<const VPDomTreeNode *> +    : public DomTreeGraphTraitsBase<const VPDomTreeNode, +                                    VPDomTreeNode::const_iterator> {}; +} // namespace llvm +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 08129b74cddf..b6307acb9474 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {    return TopRegion;  } +VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { +  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); +  return PCFGBuilder.buildPlainCFG(); +} +  // Public interface to build a H-CFG. -void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) { +void VPlanHCFGBuilder::buildHierarchicalCFG() {    // Build Top Region enclosing the plain CFG and set it as VPlan entry. -  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); -  VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG(); +  VPRegionBlock *TopRegion = buildPlainCFG();    Plan.setEntry(TopRegion);    LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);    Verifier.verifyHierarchicalCFG(TopRegion); + +  // Compute plain CFG dom tree for VPLInfo. +  VPDomTree.recalculate(*TopRegion); +  LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n"; +             VPDomTree.print(dbgs())); + +  // Compute VPLInfo and keep it in Plan. +  VPLoopInfo &VPLInfo = Plan.getVPLoopInfo(); +  VPLInfo.analyze(VPDomTree); +  LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n"; +             VPLInfo.print(dbgs()));  } diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index c4e69843615a..3f11dcb5164d 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -26,14 +26,18 @@  #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H  #include "VPlan.h" +#include "VPlanDominatorTree.h"  #include "VPlanVerifier.h"  namespace llvm {  class Loop; +class VPlanTestBase;  /// Main class to build the VPlan H-CFG for an incoming IR.  class VPlanHCFGBuilder { +  friend VPlanTestBase; +  private:    // The outermost loop of the input loop nest considered for vectorization.    Loop *TheLoop; @@ -41,14 +45,27 @@ private:    // Loop Info analysis.    LoopInfo *LI; +  // The VPlan that will contain the H-CFG we are building. +  VPlan &Plan; +    // VPlan verifier utility.    VPlanVerifier Verifier; +  // Dominator analysis for VPlan plain CFG to be used in the +  // construction of the H-CFG. This analysis is no longer valid once regions +  // are introduced. +  VPDominatorTree VPDomTree; + +  /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion) +  /// enclosing the plain CFG. +  VPRegionBlock *buildPlainCFG(); +  public: -  VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} +  VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) +      : TheLoop(Lp), LI(LI), Plan(P) {} -  /// Build H-CFG for TheLoop and update \p Plan accordingly. -  void buildHierarchicalCFG(VPlan &Plan); +  /// Build H-CFG for TheLoop and update Plan accordingly. +  void buildHierarchicalCFG();  };  } // namespace llvm diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h new file mode 100644 index 000000000000..5c2485fc2145 --- /dev/null +++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h @@ -0,0 +1,45 @@ +//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a +/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization +/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further +/// information can be found in VectorizationPlanner.rst. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H + +#include "llvm/Analysis/LoopInfoImpl.h" + +namespace llvm { +class VPBlockBase; + +/// Hold analysis information for every loop detected by VPLoopInfo. It is an +/// instantiation of LoopBase. +class VPLoop : public LoopBase<VPBlockBase, VPLoop> { +private: +  friend class LoopInfoBase<VPBlockBase, VPLoop>; +  explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {} +}; + +/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based +/// Hierarchical CFG. It is a specialization of LoopInfoBase class. +// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which +// is the same as the incoming IR CFG. If it's more efficient than running the +// whole loop detection algorithm, we may want to create a mechanism to +// translate LoopInfo into VPLoopInfo. However, that would require significant +// changes in LoopInfoBase class. +typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H | 
