diff options
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r-- | lib/Transforms/Utils/BypassSlowDivision.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Utils/CloneFunction.cpp | 32 | ||||
-rw-r--r-- | lib/Transforms/Utils/CloneModule.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Utils/EscapeEnumerator.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 61 | ||||
-rw-r--r-- | lib/Transforms/Utils/InstructionNamer.cpp | 13 | ||||
-rw-r--r-- | lib/Transforms/Utils/Local.cpp | 106 | ||||
-rw-r--r-- | lib/Transforms/Utils/LoopUtils.cpp | 201 | ||||
-rw-r--r-- | lib/Transforms/Utils/ModuleUtils.cpp | 12 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Utils/VNCoercion.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Utils/ValueMapper.cpp | 9 |
12 files changed, 324 insertions, 134 deletions
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 7ffdad597a9b..83ec7f55d1af 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -261,10 +261,10 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V, computeKnownBits(V, Known, DL); - if (Known.Zero.countLeadingOnes() >= HiBits) + if (Known.countMinLeadingZeros() >= HiBits) return VALRNG_KNOWN_SHORT; - if (Known.One.countLeadingZeros() < HiBits) + if (Known.countMaxLeadingZeros() < HiBits) return VALRNG_LIKELY_LONG; // Long integer divisions are often used in hashtable implementations. It's diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index d5124ac89016..4aa26fd14fee 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -41,6 +41,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo) { + DenseMap<const MDNode *, MDNode *> Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); @@ -50,6 +51,9 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { Instruction *NewInst = II->clone(); + if (F && F->getSubprogram()) + DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(), + F->getSubprogram(), Cache); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -120,12 +124,28 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc->getAllMetadata(MDs); - for (auto MD : MDs) - NewFunc->addMetadata( - MD.first, - *MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); + for (auto MD : MDs) { + MDNode *NewMD; + bool MustCloneSP = + (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() && + OldFunc->getParent() == NewFunc->getParent()); + if (MustCloneSP) { + auto *SP = cast<DISubprogram>(MD.second); + NewMD = DISubprogram::getDistinct( + NewFunc->getContext(), SP->getScope(), SP->getName(), + NewFunc->getName(), SP->getFile(), SP->getLine(), SP->getType(), + SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(), + SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(), + SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(), + SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(), + SP->getVariables(), SP->getThrownTypes()); + } else + NewMD = + MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); + NewFunc->addMetadata(MD.first, *NewMD); + } // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 4e9d67252d6c..5444b752de82 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -96,7 +96,7 @@ std::unique_ptr<Module> llvm::CloneModule( else GV = new GlobalVariable( *New, I->getValueType(), false, GlobalValue::ExternalLinkage, - (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr, + nullptr, I->getName(), nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); VMap[&*I] = GV; // We do not copy attributes (mainly because copying between different diff --git a/lib/Transforms/Utils/EscapeEnumerator.cpp b/lib/Transforms/Utils/EscapeEnumerator.cpp index 8c2386554da5..78d7474e5b95 100644 --- a/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -67,8 +67,7 @@ IRBuilder<> *EscapeEnumerator::Next() { // Create a cleanup block. LLVMContext &C = F.getContext(); BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = - StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); if (!F.hasPersonalityFn()) { Constant *PersFn = getDefaultPersonalityFn(F.getParent()); F.setPersonalityFn(PersFn); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 6d56e08af99f..9cb4762b683c 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -1302,41 +1302,6 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// Rebuild the entire inlined-at chain for this instruction so that the top of -/// the chain now is inlined-at the new call site. -static DebugLoc -updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode, - LLVMContext &Ctx, - DenseMap<const DILocation *, DILocation *> &IANodes) { - SmallVector<DILocation *, 3> InlinedAtLocations; - DILocation *Last = InlinedAtNode; - DILocation *CurInlinedAt = DL; - - // Gather all the inlined-at nodes - while (DILocation *IA = CurInlinedAt->getInlinedAt()) { - // Skip any we've already built nodes for - if (DILocation *Found = IANodes[IA]) { - Last = Found; - break; - } - - InlinedAtLocations.push_back(IA); - CurInlinedAt = IA; - } - - // Starting from the top, rebuild the nodes to point to the new inlined-at - // location (then rebuilding the rest of the chain behind it) and update the - // map of already-constructed inlined-at nodes. - for (const DILocation *MD : reverse(InlinedAtLocations)) { - Last = IANodes[MD] = DILocation::getDistinct( - Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); - } - - // And finally create the normal location for this instruction, referring to - // the new inlined-at chain. - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); -} - /// Return the result of AI->isStaticAlloca() if AI were moved to the entry /// block. Allocas used in inalloca calls and allocas of dynamic array size /// cannot be static. @@ -1364,14 +1329,16 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, // Cache the inlined-at nodes as they're built so they are reused, without // this every instruction's inlined-at chain would become distinct from each // other. - DenseMap<const DILocation *, DILocation *> IANodes; + DenseMap<const MDNode *, MDNode *> IANodes; for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (DebugLoc DL = BI->getDebugLoc()) { - BI->setDebugLoc( - updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); + auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(), + IANodes); + auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA); + BI->setDebugLoc(IDL); continue; } @@ -1429,11 +1396,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const Optional<uint64_t> &CalleeEntryCount, - const Instruction *TheCall) { + const Instruction *TheCall, + ProfileSummaryInfo *PSI) { if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) return; Optional<uint64_t> CallSiteCount = - ProfileSummaryInfo::getProfileCount(TheCall, nullptr); + PSI ? PSI->getProfileCount(TheCall, nullptr) : None; uint64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, CalleeEntryCount.getValue()); @@ -1456,16 +1424,16 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, /// The callsite's block count is subtracted from the callee's function entry /// count. static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, - Instruction *CallInst, Function *Callee) { + Instruction *CallInst, Function *Callee, + ProfileSummaryInfo *PSI) { // If the callee has a original count of N, and the estimated count of // callsite is M, the new callee count is set to N - M. M is estimated from // the caller's entry count, its entry block frequency and the block frequency // of the callsite. Optional<uint64_t> CalleeCount = Callee->getEntryCount(); - if (!CalleeCount.hasValue()) + if (!CalleeCount.hasValue() || !PSI) return; - Optional<uint64_t> CallCount = - ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI); + Optional<uint64_t> CallCount = PSI->getProfileCount(CallInst, CallerBFI); if (!CallCount.hasValue()) return; // Since CallSiteCount is an estimate, it could exceed the original callee @@ -1668,9 +1636,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, CalledFunc->front()); - updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall); + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, + IFI.PSI); // Update the profile count of callee. - updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc); + updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI); // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 8a1973d1db05..53b432fcafd4 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -26,16 +26,15 @@ namespace { InstNamer() : FunctionPass(ID) { initializeInstNamerPass(*PassRegistry::getPassRegistry()); } - + void getAnalysisUsage(AnalysisUsage &Info) const override { Info.setPreservesAll(); } bool runOnFunction(Function &F) override { - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); - AI != AE; ++AI) - if (!AI->hasName() && !AI->getType()->isVoidTy()) - AI->setName("arg"); + for (auto &Arg : F.args()) + if (!Arg.hasName()) + Arg.setName("arg"); for (BasicBlock &BB : F) { if (!BB.hasName()) @@ -48,11 +47,11 @@ namespace { return true; } }; - + char InstNamer::ID = 0; } -INITIALIZE_PASS(InstNamer, "instnamer", +INITIALIZE_PASS(InstNamer, "instnamer", "Assign names to anonymous instructions", false, false) char &llvm::InstructionNamerID = InstNamer::ID; //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index ce6b703f3528..1ca509472b5f 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1041,7 +1041,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, KnownBits Known(BitWidth); computeKnownBits(V, Known, DL, 0, AC, CxtI, DT); - unsigned TrailZ = Known.Zero.countTrailingOnes(); + unsigned TrailZ = Known.countMinTrailingZeros(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. @@ -1105,8 +1105,9 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { auto *DIVar = DDI->getVariable(); - auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); + auto *DIExpr = DDI->getExpression(); + Value *DV = SI->getOperand(0); // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1116,34 +1117,28 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); if (ExtendedArg) { - // We're now only describing a subset of the variable. The fragment we're - // describing will always be smaller than the variable size, because - // VariableSize == Size of Alloca described by DDI. Since SI stores - // to the alloca described by DDI, if it's first operand is an extend, - // we're guaranteed that before extension, the value was narrower than - // the size of the alloca, hence the size of the described variable. - SmallVector<uint64_t, 3> Ops; - unsigned FragmentOffset = 0; - // If this already is a bit fragment, we drop the bit fragment from the - // expression and record the offset. - auto Fragment = DIExpr->getFragmentInfo(); - if (Fragment) { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); - FragmentOffset = Fragment->OffsetInBits; - } else { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + // If this DDI was already describing only a fragment of a variable, ensure + // that fragment is appropriately narrowed here. + // But if a fragment wasn't used, describe the value as the original + // argument (rather than the zext or sext) so that it remains described even + // if the sext/zext is optimized away. This widens the variable description, + // leaving it up to the consumer to know how the smaller value may be + // represented in a larger register. + if (auto Fragment = DIExpr->getFragmentInfo()) { + unsigned FragmentOffset = Fragment->OffsetInBits; + SmallVector<uint64_t, 3> Ops(DIExpr->elements_begin(), + DIExpr->elements_end() - 3); + Ops.push_back(dwarf::DW_OP_LLVM_fragment); + Ops.push_back(FragmentOffset); + const DataLayout &DL = DDI->getModule()->getDataLayout(); + Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); + DIExpr = Builder.createExpression(Ops); } - Ops.push_back(dwarf::DW_OP_LLVM_fragment); - Ops.push_back(FragmentOffset); - const DataLayout &DL = DDI->getModule()->getDataLayout(); - Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); - auto NewDIExpr = Builder.createExpression(Ops); - if (!LdStHasDebugValue(DIVar, NewDIExpr, SI)) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr, - DDI->getDebugLoc(), SI); - } else if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, - DDI->getDebugLoc(), SI); + DV = ExtendedArg; + } + if (!LdStHasDebugValue(DIVar, DIExpr, SI)) + Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(), + SI); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value @@ -1781,44 +1776,43 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { combineMetadata(K, J, KnownIDs); } -unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, - DominatorTree &DT, - const BasicBlockEdge &Root) { +template <typename RootType, typename DominatesFn> +static unsigned replaceDominatedUsesWith(Value *From, Value *To, + const RootType &Root, + const DominatesFn &Dominates) { assert(From->getType() == To->getType()); - + unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); - UI != UE; ) { + UI != UE;) { Use &U = *UI++; - if (DT.dominates(Root, U)) { - U.set(To); - DEBUG(dbgs() << "Replace dominated use of '" - << From->getName() << "' as " - << *To << " in " << *U << "\n"); - ++Count; - } + if (!Dominates(Root, U)) + continue; + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; } return Count; } unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, - const BasicBlock *BB) { - assert(From->getType() == To->getType()); + const BasicBlockEdge &Root) { + auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) { + return DT.dominates(Root, U); + }; + return ::replaceDominatedUsesWith(From, To, Root, Dominates); +} - unsigned Count = 0; - for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); - UI != UE;) { - Use &U = *UI++; - auto *I = cast<Instruction>(U.getUser()); - if (DT.properlyDominates(BB, I->getParent())) { - U.set(To); - DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " - << *To << " in " << *U << "\n"); - ++Count; - } - } - return Count; +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) { + auto *I = cast<Instruction>(U.getUser())->getParent(); + return DT.properlyDominates(BB, I); + }; + return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); } bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 175d013a011d..81f033e7d51a 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -1112,3 +1113,203 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { else return (FalseVal + (TrueVal / 2)) / TrueVal; } + +/// \brief Adds a 'fast' flag to floating point operations. +static Value *addFastMathFlag(Value *V) { + if (isa<FPMathOperator>(V)) { + FastMathFlags Flags; + Flags.setUnsafeAlgebra(); + cast<Instruction>(V)->setFastMathFlags(Flags); + } + return V; +} + +// Helper to generate a log2 shuffle reduction. +Value * +llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, + ArrayRef<Value *> RedOps) { + unsigned VF = Src->getType()->getVectorNumElements(); + // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles + // and vector ops, reducing the set of values being computed by half each + // round. + assert(isPowerOf2_32(VF) && + "Reduction emission only supported for pow2 vectors!"); + Value *TmpVec = Src; + SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); + for (unsigned i = VF; i != 1; i >>= 1) { + // Move the upper half of the vector to the lower half. + for (unsigned j = 0; j != i / 2; ++j) + ShuffleMask[j] = Builder.getInt32(i / 2 + j); + + // Fill the rest of the mask with undef. + std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), + UndefValue::get(Builder.getInt32Ty())); + + Value *Shuf = Builder.CreateShuffleVector( + TmpVec, UndefValue::get(TmpVec->getType()), + ConstantVector::get(ShuffleMask), "rdx.shuf"); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + // Floating point operations had to be 'fast' to enable the reduction. + TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, + TmpVec, Shuf, "bin.rdx")); + } else { + assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + "Invalid min/max"); + TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, + Shuf); + } + if (!RedOps.empty()) + propagateIRFlags(TmpVec, RedOps); + } + // The result is in the first element of the vector. + return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); +} + +/// Create a simple vector reduction specified by an opcode and some +/// flags (if generating min/max reductions). +Value *llvm::createSimpleTargetReduction( + IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, + Value *Src, TargetTransformInfo::ReductionFlags Flags, + ArrayRef<Value *> RedOps) { + assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); + + Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); + std::function<Value*()> BuildFunc; + using RD = RecurrenceDescriptor; + RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; + // TODO: Support creating ordered reductions. + FastMathFlags FMFUnsafe; + FMFUnsafe.setUnsafeAlgebra(); + + switch (Opcode) { + case Instruction::Add: + BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; + break; + case Instruction::Mul: + BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; + break; + case Instruction::And: + BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; + break; + case Instruction::Or: + BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; + break; + case Instruction::Xor: + BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; + break; + case Instruction::FAdd: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); + cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::FMul: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); + cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::ICmp: + if (Flags.IsMaxOp) { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; + BuildFunc = [&]() { + return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); + }; + } else { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; + BuildFunc = [&]() { + return Builder.CreateIntMinReduce(Src, Flags.IsSigned); + }; + } + break; + case Instruction::FCmp: + if (Flags.IsMaxOp) { + MinMaxKind = RD::MRK_FloatMax; + BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; + } else { + MinMaxKind = RD::MRK_FloatMin; + BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; + } + break; + default: + llvm_unreachable("Unhandled opcode"); + break; + } + if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) + return BuildFunc(); + return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); +} + +/// Create a vector reduction using a given recurrence descriptor. +Value *llvm::createTargetReduction(IRBuilder<> &Builder, + const TargetTransformInfo *TTI, + RecurrenceDescriptor &Desc, Value *Src, + bool NoNaN) { + // TODO: Support in-order reductions based on the recurrence descriptor. + RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); + TargetTransformInfo::ReductionFlags Flags; + Flags.NoNaN = NoNaN; + auto getSimpleRdx = [&](unsigned Opc) { + return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); + }; + switch (RecKind) { + case RecurrenceDescriptor::RK_FloatAdd: + return getSimpleRdx(Instruction::FAdd); + case RecurrenceDescriptor::RK_FloatMult: + return getSimpleRdx(Instruction::FMul); + case RecurrenceDescriptor::RK_IntegerAdd: + return getSimpleRdx(Instruction::Add); + case RecurrenceDescriptor::RK_IntegerMult: + return getSimpleRdx(Instruction::Mul); + case RecurrenceDescriptor::RK_IntegerAnd: + return getSimpleRdx(Instruction::And); + case RecurrenceDescriptor::RK_IntegerOr: + return getSimpleRdx(Instruction::Or); + case RecurrenceDescriptor::RK_IntegerXor: + return getSimpleRdx(Instruction::Xor); + case RecurrenceDescriptor::RK_IntegerMinMax: { + switch (Desc.getMinMaxRecurrenceKind()) { + case RecurrenceDescriptor::MRK_SIntMax: + Flags.IsSigned = true; + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_UIntMax: + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_SIntMin: + Flags.IsSigned = true; + break; + case RecurrenceDescriptor::MRK_UIntMin: + break; + default: + llvm_unreachable("Unhandled MRK"); + } + return getSimpleRdx(Instruction::ICmp); + } + case RecurrenceDescriptor::RK_FloatMinMax: { + Flags.IsMaxOp = + Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; + return getSimpleRdx(Instruction::FCmp); + } + default: + llvm_unreachable("Unhandled RecKind"); + } +} + +void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) { + if (auto *VecOp = dyn_cast<Instruction>(I)) { + if (auto *I0 = dyn_cast<Instruction>(VL[0])) { + // VecOVp is initialized to the 0th scalar, so start counting from index + // '1'. + VecOp->copyIRFlags(I0); + for (int i = 1, e = VL.size(); i < e; ++i) { + if (auto *Scalar = dyn_cast<Instruction>(VL[i])) + VecOp->andIRFlags(Scalar); + } + } + } +} diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 29d334f2968f..2ef3d6336ae2 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -35,7 +35,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, // Upgrade a 2-field global array type to the new 3-field format if needed. if (Data && OldEltTy->getNumElements() < 3) EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy(), nullptr); + IRB.getInt8PtrTy()); else EltTy = OldEltTy; if (Constant *Init = GVCtor->getInitializer()) { @@ -44,10 +44,10 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, for (unsigned i = 0; i != n; ++i) { auto Ctor = cast<Constant>(Init->getOperand(i)); if (EltTy != OldEltTy) - Ctor = ConstantStruct::get( - EltTy, Ctor->getAggregateElement((unsigned)0), - Ctor->getAggregateElement(1), - Constant::getNullValue(IRB.getInt8PtrTy()), nullptr); + Ctor = + ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0), + Ctor->getAggregateElement(1), + Constant::getNullValue(IRB.getInt8PtrTy())); CurrentCtors.push_back(Ctor); } } @@ -55,7 +55,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, } else { // Use the new three-field struct if there isn't one already. EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy(), nullptr); + IRB.getInt8PtrTy()); } // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 9e71d746de34..1de579ed41b0 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1450,11 +1450,11 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, // x86_64 can't use {float, float} since that would be returned in both // xmm0 and xmm1, which isn't what a real struct would do. ResTy = T.getArch() == Triple::x86_64 - ? static_cast<Type *>(VectorType::get(ArgTy, 2)) - : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr)); + ? static_cast<Type *>(VectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy)); } else { Name = "__sincospi_stret"; - ResTy = StructType::get(ArgTy, ArgTy, nullptr); + ResTy = StructType::get(ArgTy, ArgTy); } Module *M = OrigCallee->getParent(); diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index 83bd29dbca65..60d9ede2c487 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -303,6 +303,15 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, const DataLayout &DL) { LLVMContext &Ctx = SrcVal->getType()->getContext(); + // If two pointers are in the same address space, they have the same size, + // so we don't need to do any truncation, etc. This avoids introducing + // ptrtoint instructions for pointers that may be non-integral. + if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && + cast<PointerType>(SrcVal->getType())->getAddressSpace() == + cast<PointerType>(LoadTy)->getAddressSpace()) { + return SrcVal; + } + uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; // Compute which bits of the stored value are being used by the load. Convert diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 84d89f103a2f..930972924c3c 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -949,11 +949,10 @@ void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, Constant *NewV; if (IsOldCtorDtor) { auto *S = cast<ConstantStruct>(V); - auto *E1 = mapValue(S->getOperand(0)); - auto *E2 = mapValue(S->getOperand(1)); - Value *Null = Constant::getNullValue(VoidPtrTy); - NewV = - ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr); + auto *E1 = cast<Constant>(mapValue(S->getOperand(0))); + auto *E2 = cast<Constant>(mapValue(S->getOperand(1))); + Constant *Null = Constant::getNullValue(VoidPtrTy); + NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null); } else { NewV = cast_or_null<Constant>(mapValue(V)); } |