summaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp32
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp2
-rw-r--r--lib/Transforms/Utils/EscapeEnumerator.cpp3
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp61
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp13
-rw-r--r--lib/Transforms/Utils/Local.cpp106
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp201
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp12
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp6
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp9
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp9
12 files changed, 324 insertions, 134 deletions
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 7ffdad597a9b..83ec7f55d1af 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -261,10 +261,10 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V,
computeKnownBits(V, Known, DL);
- if (Known.Zero.countLeadingOnes() >= HiBits)
+ if (Known.countMinLeadingZeros() >= HiBits)
return VALRNG_KNOWN_SHORT;
- if (Known.One.countLeadingZeros() < HiBits)
+ if (Known.countMaxLeadingZeros() < HiBits)
return VALRNG_LIKELY_LONG;
// Long integer divisions are often used in hashtable implementations. It's
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d5124ac89016..4aa26fd14fee 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -41,6 +41,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
ClonedCodeInfo *CodeInfo) {
+ DenseMap<const MDNode *, MDNode *> Cache;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
@@ -50,6 +51,9 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
Instruction *NewInst = II->clone();
+ if (F && F->getSubprogram())
+ DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(),
+ F->getSubprogram(), Cache);
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
@@ -120,12 +124,28 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
- for (auto MD : MDs)
- NewFunc->addMetadata(
- MD.first,
- *MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer));
+ for (auto MD : MDs) {
+ MDNode *NewMD;
+ bool MustCloneSP =
+ (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() &&
+ OldFunc->getParent() == NewFunc->getParent());
+ if (MustCloneSP) {
+ auto *SP = cast<DISubprogram>(MD.second);
+ NewMD = DISubprogram::getDistinct(
+ NewFunc->getContext(), SP->getScope(), SP->getName(),
+ NewFunc->getName(), SP->getFile(), SP->getLine(), SP->getType(),
+ SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(),
+ SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(),
+ SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(),
+ SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(),
+ SP->getVariables(), SP->getThrownTypes());
+ } else
+ NewMD =
+ MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+ NewFunc->addMetadata(MD.first, *NewMD);
+ }
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 4e9d67252d6c..5444b752de82 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -96,7 +96,7 @@ std::unique_ptr<Module> llvm::CloneModule(
else
GV = new GlobalVariable(
*New, I->getValueType(), false, GlobalValue::ExternalLinkage,
- (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr,
+ nullptr, I->getName(), nullptr,
I->getThreadLocalMode(), I->getType()->getAddressSpace());
VMap[&*I] = GV;
// We do not copy attributes (mainly because copying between different
diff --git a/lib/Transforms/Utils/EscapeEnumerator.cpp b/lib/Transforms/Utils/EscapeEnumerator.cpp
index 8c2386554da5..78d7474e5b95 100644
--- a/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -67,8 +67,7 @@ IRBuilder<> *EscapeEnumerator::Next() {
// Create a cleanup block.
LLVMContext &C = F.getContext();
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
- Type *ExnTy =
- StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
+ Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
if (!F.hasPersonalityFn()) {
Constant *PersFn = getDefaultPersonalityFn(F.getParent());
F.setPersonalityFn(PersFn);
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 6d56e08af99f..9cb4762b683c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1302,41 +1302,6 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
return false;
}
-/// Rebuild the entire inlined-at chain for this instruction so that the top of
-/// the chain now is inlined-at the new call site.
-static DebugLoc
-updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode,
- LLVMContext &Ctx,
- DenseMap<const DILocation *, DILocation *> &IANodes) {
- SmallVector<DILocation *, 3> InlinedAtLocations;
- DILocation *Last = InlinedAtNode;
- DILocation *CurInlinedAt = DL;
-
- // Gather all the inlined-at nodes
- while (DILocation *IA = CurInlinedAt->getInlinedAt()) {
- // Skip any we've already built nodes for
- if (DILocation *Found = IANodes[IA]) {
- Last = Found;
- break;
- }
-
- InlinedAtLocations.push_back(IA);
- CurInlinedAt = IA;
- }
-
- // Starting from the top, rebuild the nodes to point to the new inlined-at
- // location (then rebuilding the rest of the chain behind it) and update the
- // map of already-constructed inlined-at nodes.
- for (const DILocation *MD : reverse(InlinedAtLocations)) {
- Last = IANodes[MD] = DILocation::getDistinct(
- Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
- }
-
- // And finally create the normal location for this instruction, referring to
- // the new inlined-at chain.
- return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
-}
-
/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
/// block. Allocas used in inalloca calls and allocas of dynamic array size
/// cannot be static.
@@ -1364,14 +1329,16 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
// Cache the inlined-at nodes as they're built so they are reused, without
// this every instruction's inlined-at chain would become distinct from each
// other.
- DenseMap<const DILocation *, DILocation *> IANodes;
+ DenseMap<const MDNode *, MDNode *> IANodes;
for (; FI != Fn->end(); ++FI) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
if (DebugLoc DL = BI->getDebugLoc()) {
- BI->setDebugLoc(
- updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
+ auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(),
+ IANodes);
+ auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA);
+ BI->setDebugLoc(IDL);
continue;
}
@@ -1429,11 +1396,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
/// Update the branch metadata for cloned call instructions.
static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
const Optional<uint64_t> &CalleeEntryCount,
- const Instruction *TheCall) {
+ const Instruction *TheCall,
+ ProfileSummaryInfo *PSI) {
if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1)
return;
Optional<uint64_t> CallSiteCount =
- ProfileSummaryInfo::getProfileCount(TheCall, nullptr);
+ PSI ? PSI->getProfileCount(TheCall, nullptr) : None;
uint64_t CallCount =
std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
CalleeEntryCount.getValue());
@@ -1456,16 +1424,16 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
/// The callsite's block count is subtracted from the callee's function entry
/// count.
static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
- Instruction *CallInst, Function *Callee) {
+ Instruction *CallInst, Function *Callee,
+ ProfileSummaryInfo *PSI) {
// If the callee has a original count of N, and the estimated count of
// callsite is M, the new callee count is set to N - M. M is estimated from
// the caller's entry count, its entry block frequency and the block frequency
// of the callsite.
Optional<uint64_t> CalleeCount = Callee->getEntryCount();
- if (!CalleeCount.hasValue())
+ if (!CalleeCount.hasValue() || !PSI)
return;
- Optional<uint64_t> CallCount =
- ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI);
+ Optional<uint64_t> CallCount = PSI->getProfileCount(CallInst, CallerBFI);
if (!CallCount.hasValue())
return;
// Since CallSiteCount is an estimate, it could exceed the original callee
@@ -1668,9 +1636,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
CalledFunc->front());
- updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall);
+ updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
+ IFI.PSI);
// Update the profile count of callee.
- updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc);
+ updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI);
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 8a1973d1db05..53b432fcafd4 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -26,16 +26,15 @@ namespace {
InstNamer() : FunctionPass(ID) {
initializeInstNamerPass(*PassRegistry::getPassRegistry());
}
-
+
void getAnalysisUsage(AnalysisUsage &Info) const override {
Info.setPreservesAll();
}
bool runOnFunction(Function &F) override {
- for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
- AI != AE; ++AI)
- if (!AI->hasName() && !AI->getType()->isVoidTy())
- AI->setName("arg");
+ for (auto &Arg : F.args())
+ if (!Arg.hasName())
+ Arg.setName("arg");
for (BasicBlock &BB : F) {
if (!BB.hasName())
@@ -48,11 +47,11 @@ namespace {
return true;
}
};
-
+
char InstNamer::ID = 0;
}
-INITIALIZE_PASS(InstNamer, "instnamer",
+INITIALIZE_PASS(InstNamer, "instnamer",
"Assign names to anonymous instructions", false, false)
char &llvm::InstructionNamerID = InstNamer::ID;
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index ce6b703f3528..1ca509472b5f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1041,7 +1041,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
KnownBits Known(BitWidth);
computeKnownBits(V, Known, DL, 0, AC, CxtI, DT);
- unsigned TrailZ = Known.Zero.countTrailingOnes();
+ unsigned TrailZ = Known.countMinTrailingZeros();
// Avoid trouble with ridiculously large TrailZ values, such as
// those computed from a null pointer.
@@ -1105,8 +1105,9 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
auto *DIVar = DDI->getVariable();
- auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
+ auto *DIExpr = DDI->getExpression();
+ Value *DV = SI->getOperand(0);
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -1116,34 +1117,28 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
if (ExtendedArg) {
- // We're now only describing a subset of the variable. The fragment we're
- // describing will always be smaller than the variable size, because
- // VariableSize == Size of Alloca described by DDI. Since SI stores
- // to the alloca described by DDI, if it's first operand is an extend,
- // we're guaranteed that before extension, the value was narrower than
- // the size of the alloca, hence the size of the described variable.
- SmallVector<uint64_t, 3> Ops;
- unsigned FragmentOffset = 0;
- // If this already is a bit fragment, we drop the bit fragment from the
- // expression and record the offset.
- auto Fragment = DIExpr->getFragmentInfo();
- if (Fragment) {
- Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
- FragmentOffset = Fragment->OffsetInBits;
- } else {
- Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ // If this DDI was already describing only a fragment of a variable, ensure
+ // that fragment is appropriately narrowed here.
+ // But if a fragment wasn't used, describe the value as the original
+ // argument (rather than the zext or sext) so that it remains described even
+ // if the sext/zext is optimized away. This widens the variable description,
+ // leaving it up to the consumer to know how the smaller value may be
+ // represented in a larger register.
+ if (auto Fragment = DIExpr->getFragmentInfo()) {
+ unsigned FragmentOffset = Fragment->OffsetInBits;
+ SmallVector<uint64_t, 3> Ops(DIExpr->elements_begin(),
+ DIExpr->elements_end() - 3);
+ Ops.push_back(dwarf::DW_OP_LLVM_fragment);
+ Ops.push_back(FragmentOffset);
+ const DataLayout &DL = DDI->getModule()->getDataLayout();
+ Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
+ DIExpr = Builder.createExpression(Ops);
}
- Ops.push_back(dwarf::DW_OP_LLVM_fragment);
- Ops.push_back(FragmentOffset);
- const DataLayout &DL = DDI->getModule()->getDataLayout();
- Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
- auto NewDIExpr = Builder.createExpression(Ops);
- if (!LdStHasDebugValue(DIVar, NewDIExpr, SI))
- Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr,
- DDI->getDebugLoc(), SI);
- } else if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
- DDI->getDebugLoc(), SI);
+ DV = ExtendedArg;
+ }
+ if (!LdStHasDebugValue(DIVar, DIExpr, SI))
+ Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+ SI);
}
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
@@ -1781,44 +1776,43 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) {
combineMetadata(K, J, KnownIDs);
}
-unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
- DominatorTree &DT,
- const BasicBlockEdge &Root) {
+template <typename RootType, typename DominatesFn>
+static unsigned replaceDominatedUsesWith(Value *From, Value *To,
+ const RootType &Root,
+ const DominatesFn &Dominates) {
assert(From->getType() == To->getType());
-
+
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE; ) {
+ UI != UE;) {
Use &U = *UI++;
- if (DT.dominates(Root, U)) {
- U.set(To);
- DEBUG(dbgs() << "Replace dominated use of '"
- << From->getName() << "' as "
- << *To << " in " << *U << "\n");
- ++Count;
- }
+ if (!Dominates(Root, U))
+ continue;
+ U.set(To);
+ DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
+ << *To << " in " << *U << "\n");
+ ++Count;
}
return Count;
}
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
DominatorTree &DT,
- const BasicBlock *BB) {
- assert(From->getType() == To->getType());
+ const BasicBlockEdge &Root) {
+ auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) {
+ return DT.dominates(Root, U);
+ };
+ return ::replaceDominatedUsesWith(From, To, Root, Dominates);
+}
- unsigned Count = 0;
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE;) {
- Use &U = *UI++;
- auto *I = cast<Instruction>(U.getUser());
- if (DT.properlyDominates(BB, I->getParent())) {
- U.set(To);
- DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
- << *To << " in " << *U << "\n");
- ++Count;
- }
- }
- return Count;
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlock *BB) {
+ auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) {
+ auto *I = cast<Instruction>(U.getUser())->getParent();
+ return DT.properlyDominates(BB, I);
+ };
+ return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
}
bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 175d013a011d..81f033e7d51a 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
@@ -1112,3 +1113,203 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
else
return (FalseVal + (TrueVal / 2)) / TrueVal;
}
+
+/// \brief Adds a 'fast' flag to floating point operations.
+static Value *addFastMathFlag(Value *V) {
+ if (isa<FPMathOperator>(V)) {
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+ cast<Instruction>(V)->setFastMathFlags(Flags);
+ }
+ return V;
+}
+
+// Helper to generate a log2 shuffle reduction.
+Value *
+llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
+ RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
+ ArrayRef<Value *> RedOps) {
+ unsigned VF = Src->getType()->getVectorNumElements();
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ Value *TmpVec = Src;
+ SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i / 2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i / 2 + j);
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
+ UndefValue::get(Builder.getInt32Ty()));
+
+ Value *Shuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()),
+ ConstantVector::get(ShuffleMask), "rdx.shuf");
+
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
+ // Floating point operations had to be 'fast' to enable the reduction.
+ TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
+ TmpVec, Shuf, "bin.rdx"));
+ } else {
+ assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
+ "Invalid min/max");
+ TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
+ Shuf);
+ }
+ if (!RedOps.empty())
+ propagateIRFlags(TmpVec, RedOps);
+ }
+ // The result is in the first element of the vector.
+ return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+}
+
+/// Create a simple vector reduction specified by an opcode and some
+/// flags (if generating min/max reductions).
+Value *llvm::createSimpleTargetReduction(
+ IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
+ Value *Src, TargetTransformInfo::ReductionFlags Flags,
+ ArrayRef<Value *> RedOps) {
+ assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
+
+ Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
+ std::function<Value*()> BuildFunc;
+ using RD = RecurrenceDescriptor;
+ RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
+ // TODO: Support creating ordered reductions.
+ FastMathFlags FMFUnsafe;
+ FMFUnsafe.setUnsafeAlgebra();
+
+ switch (Opcode) {
+ case Instruction::Add:
+ BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
+ break;
+ case Instruction::Mul:
+ BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
+ break;
+ case Instruction::And:
+ BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
+ break;
+ case Instruction::Or:
+ BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
+ break;
+ case Instruction::Xor:
+ BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
+ break;
+ case Instruction::FAdd:
+ BuildFunc = [&]() {
+ auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
+ cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+ return Rdx;
+ };
+ break;
+ case Instruction::FMul:
+ BuildFunc = [&]() {
+ auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
+ cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+ return Rdx;
+ };
+ break;
+ case Instruction::ICmp:
+ if (Flags.IsMaxOp) {
+ MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
+ BuildFunc = [&]() {
+ return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
+ };
+ } else {
+ MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
+ BuildFunc = [&]() {
+ return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
+ };
+ }
+ break;
+ case Instruction::FCmp:
+ if (Flags.IsMaxOp) {
+ MinMaxKind = RD::MRK_FloatMax;
+ BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
+ } else {
+ MinMaxKind = RD::MRK_FloatMin;
+ BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
+ }
+ break;
+ default:
+ llvm_unreachable("Unhandled opcode");
+ break;
+ }
+ if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
+ return BuildFunc();
+ return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
+}
+
+/// Create a vector reduction using a given recurrence descriptor.
+Value *llvm::createTargetReduction(IRBuilder<> &Builder,
+ const TargetTransformInfo *TTI,
+ RecurrenceDescriptor &Desc, Value *Src,
+ bool NoNaN) {
+ // TODO: Support in-order reductions based on the recurrence descriptor.
+ RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind();
+ TargetTransformInfo::ReductionFlags Flags;
+ Flags.NoNaN = NoNaN;
+ auto getSimpleRdx = [&](unsigned Opc) {
+ return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags);
+ };
+ switch (RecKind) {
+ case RecurrenceDescriptor::RK_FloatAdd:
+ return getSimpleRdx(Instruction::FAdd);
+ case RecurrenceDescriptor::RK_FloatMult:
+ return getSimpleRdx(Instruction::FMul);
+ case RecurrenceDescriptor::RK_IntegerAdd:
+ return getSimpleRdx(Instruction::Add);
+ case RecurrenceDescriptor::RK_IntegerMult:
+ return getSimpleRdx(Instruction::Mul);
+ case RecurrenceDescriptor::RK_IntegerAnd:
+ return getSimpleRdx(Instruction::And);
+ case RecurrenceDescriptor::RK_IntegerOr:
+ return getSimpleRdx(Instruction::Or);
+ case RecurrenceDescriptor::RK_IntegerXor:
+ return getSimpleRdx(Instruction::Xor);
+ case RecurrenceDescriptor::RK_IntegerMinMax: {
+ switch (Desc.getMinMaxRecurrenceKind()) {
+ case RecurrenceDescriptor::MRK_SIntMax:
+ Flags.IsSigned = true;
+ Flags.IsMaxOp = true;
+ break;
+ case RecurrenceDescriptor::MRK_UIntMax:
+ Flags.IsMaxOp = true;
+ break;
+ case RecurrenceDescriptor::MRK_SIntMin:
+ Flags.IsSigned = true;
+ break;
+ case RecurrenceDescriptor::MRK_UIntMin:
+ break;
+ default:
+ llvm_unreachable("Unhandled MRK");
+ }
+ return getSimpleRdx(Instruction::ICmp);
+ }
+ case RecurrenceDescriptor::RK_FloatMinMax: {
+ Flags.IsMaxOp =
+ Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax;
+ return getSimpleRdx(Instruction::FCmp);
+ }
+ default:
+ llvm_unreachable("Unhandled RecKind");
+ }
+}
+
+void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
+ if (auto *VecOp = dyn_cast<Instruction>(I)) {
+ if (auto *I0 = dyn_cast<Instruction>(VL[0])) {
+ // VecOVp is initialized to the 0th scalar, so start counting from index
+ // '1'.
+ VecOp->copyIRFlags(I0);
+ for (int i = 1, e = VL.size(); i < e; ++i) {
+ if (auto *Scalar = dyn_cast<Instruction>(VL[i]))
+ VecOp->andIRFlags(Scalar);
+ }
+ }
+ }
+}
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 29d334f2968f..2ef3d6336ae2 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -35,7 +35,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F,
// Upgrade a 2-field global array type to the new 3-field format if needed.
if (Data && OldEltTy->getNumElements() < 3)
EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- IRB.getInt8PtrTy(), nullptr);
+ IRB.getInt8PtrTy());
else
EltTy = OldEltTy;
if (Constant *Init = GVCtor->getInitializer()) {
@@ -44,10 +44,10 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F,
for (unsigned i = 0; i != n; ++i) {
auto Ctor = cast<Constant>(Init->getOperand(i));
if (EltTy != OldEltTy)
- Ctor = ConstantStruct::get(
- EltTy, Ctor->getAggregateElement((unsigned)0),
- Ctor->getAggregateElement(1),
- Constant::getNullValue(IRB.getInt8PtrTy()), nullptr);
+ Ctor =
+ ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0),
+ Ctor->getAggregateElement(1),
+ Constant::getNullValue(IRB.getInt8PtrTy()));
CurrentCtors.push_back(Ctor);
}
}
@@ -55,7 +55,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F,
} else {
// Use the new three-field struct if there isn't one already.
EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- IRB.getInt8PtrTy(), nullptr);
+ IRB.getInt8PtrTy());
}
// Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 9e71d746de34..1de579ed41b0 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1450,11 +1450,11 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
// x86_64 can't use {float, float} since that would be returned in both
// xmm0 and xmm1, which isn't what a real struct would do.
ResTy = T.getArch() == Triple::x86_64
- ? static_cast<Type *>(VectorType::get(ArgTy, 2))
- : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr));
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
} else {
Name = "__sincospi_stret";
- ResTy = StructType::get(ArgTy, ArgTy, nullptr);
+ ResTy = StructType::get(ArgTy, ArgTy);
}
Module *M = OrigCallee->getParent();
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index 83bd29dbca65..60d9ede2c487 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -303,6 +303,15 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
const DataLayout &DL) {
LLVMContext &Ctx = SrcVal->getType()->getContext();
+ // If two pointers are in the same address space, they have the same size,
+ // so we don't need to do any truncation, etc. This avoids introducing
+ // ptrtoint instructions for pointers that may be non-integral.
+ if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() &&
+ cast<PointerType>(SrcVal->getType())->getAddressSpace() ==
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
+ return SrcVal;
+ }
+
uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
// Compute which bits of the stored value are being used by the load. Convert
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 84d89f103a2f..930972924c3c 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -949,11 +949,10 @@ void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
Constant *NewV;
if (IsOldCtorDtor) {
auto *S = cast<ConstantStruct>(V);
- auto *E1 = mapValue(S->getOperand(0));
- auto *E2 = mapValue(S->getOperand(1));
- Value *Null = Constant::getNullValue(VoidPtrTy);
- NewV =
- ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr);
+ auto *E1 = cast<Constant>(mapValue(S->getOperand(0)));
+ auto *E2 = cast<Constant>(mapValue(S->getOperand(1)));
+ Constant *Null = Constant::getNullValue(VoidPtrTy);
+ NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null);
} else {
NewV = cast_or_null<Constant>(mapValue(V));
}