diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-16 21:03:24 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-16 21:03:24 +0000 |
commit | 7c7aba6e5fef47a01a136be655b0a92cfd7090f6 (patch) | |
tree | 99ec531924f6078534b100ab9d7696abce848099 /lib | |
parent | 7ab83427af0f77b59941ceba41d509d7d097b065 (diff) | |
download | src-7c7aba6e5fef47a01a136be655b0a92cfd7090f6.tar.gz src-7c7aba6e5fef47a01a136be655b0a92cfd7090f6.zip |
Notes
Diffstat (limited to 'lib')
197 files changed, 4631 insertions, 2630 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index f743cb234c45..dbb1b01b94ac 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1011,10 +1011,24 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // equal each other so we can exit early. if (C1 && C2) return NoAlias; - if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1), - GEP2->getOperand(GEP2->getNumOperands() - 1), - DL)) - return NoAlias; + { + Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1); + Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1); + if (isa<PHINode>(GEP1LastIdx) || isa<PHINode>(GEP2LastIdx)) { + // If one of the indices is a PHI node, be safe and only use + // computeKnownBits so we don't make any assumptions about the + // relationships between the two indices. This is important if we're + // asking about values from different loop iterations. See PR32314. + // TODO: We may be able to change the check so we only do this when + // we definitely looked through a PHINode. + KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL); + KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL); + if (Known1.Zero.intersects(Known2.One) || + Known1.One.intersects(Known2.Zero)) + return NoAlias; + } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL)) + return NoAlias; + } return MayAlias; } else if (!LastIndexedStruct || !C1 || !C2) { return MayAlias; diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp index 5896e6e0902f..facda246936d 100644 --- a/lib/Analysis/CallGraphSCCPass.cpp +++ b/lib/Analysis/CallGraphSCCPass.cpp @@ -608,18 +608,18 @@ namespace { } bool runOnSCC(CallGraphSCC &SCC) override { + bool BannerPrinted = false; auto PrintBannerOnce = [&] () { - static bool BannerPrinted = false; if (BannerPrinted) return; Out << Banner; BannerPrinted = true; }; for (CallGraphNode *CGN : SCC) { - if (CGN->getFunction()) { - if (isFunctionInPrintList(CGN->getFunction()->getName())) { + if (Function *F = CGN->getFunction()) { + if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) { PrintBannerOnce(); - CGN->getFunction()->print(Out); + F->print(Out); } } else if (llvm::isFunctionInPrintList("*")) { PrintBannerOnce(); diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 1b36569f7a07..2d39a0b02150 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -241,7 +241,7 @@ void DivergencePropagator::exploreDataDependency(Value *V) { // Follow def-use chains of V. for (User *U : V->users()) { Instruction *UserInst = cast<Instruction>(U); - if (DV.insert(UserInst).second) + if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second) Worklist.push_back(UserInst); } } diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index e0e04a91410f..86d0d92799f2 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -1872,7 +1872,6 @@ MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<MemorySSAWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); } bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { @@ -1957,6 +1956,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( #ifdef EXPENSIVE_CHECKS MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); assert(NewNoCache == New && "Cache made us hand back a different result?"); + (void)NewNoCache; #endif if (AutoResetWalker) resetClobberWalker(); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index b9c4716b5528..aebc80a0a885 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -149,9 +149,9 @@ static cl::opt<unsigned> MaxValueCompareDepth( cl::init(2)); static cl::opt<unsigned> - MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden, - cl::desc("Maximum depth of recursive AddExpr"), - cl::init(32)); + MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, + cl::desc("Maximum depth of recursive arithmetics"), + cl::init(32)); static cl::opt<unsigned> MaxConstantEvolvingDepth( "scalar-evolution-max-constant-evolving-depth", cl::Hidden, @@ -2276,8 +2276,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Ops.size() == 1) return Ops[0]; } - // Limit recursion calls depth - if (Depth > MaxAddExprDepth) + // Limit recursion calls depth. + if (Depth > MaxArithDepth) return getOrCreateAddExpr(Ops, Flags); // Okay, check to see if the same value occurs in the operand list more than @@ -2293,7 +2293,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, ++Count; // Merge the values into a multiply. const SCEV *Scale = getConstant(Ty, Count); - const SCEV *Mul = getMulExpr(Scale, Ops[i]); + const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == Count) return Mul; Ops[i] = Mul; @@ -2343,7 +2343,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } } if (Ok) - LargeOps.push_back(getMulExpr(LargeMulOps)); + LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1)); } else { Ok = false; break; @@ -2417,7 +2417,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (MulOp.first != 0) Ops.push_back(getMulExpr( getConstant(MulOp.first), - getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1))); + getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1)); if (Ops.empty()) return getZero(Ty); if (Ops.size() == 1) @@ -2445,11 +2446,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul = getMulExpr(MulOps); + InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul}; const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); - const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV, + SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); @@ -2478,19 +2480,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul1 = getMulExpr(MulOps); + InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), OtherMul->op_begin()+OMulOp); MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); - InnerMul2 = getMulExpr(MulOps); + InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2}; const SCEV *InnerMulSum = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); - const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum, + SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); @@ -2621,6 +2624,27 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops, return S; } +const SCEV * +ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags) { + FoldingSetNodeID ID; + ID.AddInteger(scMulExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + SCEVMulExpr *S = + static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { uint64_t k = i*j; if (j > 1 && k / j != i) Overflow = true; @@ -2673,7 +2697,8 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) { /// Get a canonical multiply expression, or something simpler if possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); @@ -2690,6 +2715,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); + // Limit recursion calls depth. + if (Depth > MaxArithDepth) + return getOrCreateMulExpr(Ops, Flags); + // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { @@ -2701,8 +2730,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // apply this transformation as well. if (Add->getNumOperands() == 2) if (containsConstantSomewhere(Add)) - return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), - getMulExpr(LHSC, Add->getOperand(1))); + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), + SCEV::FlagAnyWrap, Depth + 1), + getMulExpr(LHSC, Add->getOperand(1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); ++Idx; while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { @@ -2730,17 +2762,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, SmallVector<const SCEV *, 4> NewOps; bool AnyFolded = false; for (const SCEV *AddOp : Add->operands()) { - const SCEV *Mul = getMulExpr(Ops[0], AddOp); + const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap, + Depth + 1); if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; NewOps.push_back(Mul); } if (AnyFolded) - return getAddExpr(NewOps); + return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1); } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector<const SCEV *, 4> Operands; for (const SCEV *AddRecOp : AddRec->operands()) - Operands.push_back(getMulExpr(Ops[0], AddRecOp)); + Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap, + Depth + 1)); return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); @@ -2762,18 +2796,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { if (Ops.size() > MulOpsInlineThreshold) break; - // If we have an mul, expand the mul operands onto the end of the operands - // list. + // If we have an mul, expand the mul operands onto the end of the + // operands list. Ops.erase(Ops.begin()+Idx); Ops.append(Mul->op_begin(), Mul->op_end()); DeletedMul = true; } - // If we deleted at least one mul, we added operands to the end of the list, - // and they are not necessarily sorted. Recurse to resort and resimplify - // any operands we just acquired. + // If we deleted at least one mul, we added operands to the end of the + // list, and they are not necessarily sorted. Recurse to resort and + // resimplify any operands we just acquired. if (DeletedMul) - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // If there are any add recurrences in the operands list, see if any other @@ -2784,8 +2818,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Scan over all recurrences, trying to fold loop invariants into them. for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { - // Scan all of the other operands to this mul and add them to the vector if - // they are loop invariant w.r.t. the recurrence. + // Scan all of the other operands to this mul and add them to the vector + // if they are loop invariant w.r.t. the recurrence. SmallVector<const SCEV *, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); @@ -2801,9 +2835,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(AddRec->getNumOperands()); - const SCEV *Scale = getMulExpr(LIOps); + const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1); for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) - NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i), + SCEV::FlagAnyWrap, Depth + 1)); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer mul and the inner addrec are guaranteed to have no overflow. @@ -2822,12 +2857,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, Ops[i] = NewRec; break; } - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } - // Okay, if there weren't any loop invariants to be folded, check to see if - // there are multiple AddRec's with the same loop induction variable being - // multiplied together. If so, we can fold them. + // Okay, if there weren't any loop invariants to be folded, check to see + // if there are multiple AddRec's with the same loop induction variable + // being multiplied together. If so, we can fold them. // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ @@ -2869,7 +2904,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, const SCEV *CoeffTerm = getConstant(Ty, Coeff); const SCEV *Term1 = AddRec->getOperand(y-z); const SCEV *Term2 = OtherAddRec->getOperand(z); - Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2, + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); } } AddRecOps.push_back(Term); @@ -2887,7 +2924,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, } } if (OpsModified) - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2895,22 +2932,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Okay, it looks like we really DO need an mul expr. Check to see if we // already have one, otherwise create a new one. - FoldingSetNodeID ID; - ID.AddInteger(scMulExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); - void *IP = nullptr; - SCEVMulExpr *S = - static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); - if (!S) { - const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); - S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); - UniqueSCEVs.InsertNode(S, IP); - } - S->setNoWrapFlags(Flags); - return S; + return getOrCreateMulExpr(Ops, Flags); } /// Get a canonical unsigned division expression, or something simpler if @@ -3713,7 +3735,8 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { } const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { // Fast path: X - X --> 0. if (LHS == RHS) return getZero(LHS->getType()); @@ -3747,7 +3770,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, // larger scope than intended. auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags); + return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); } const SCEV * diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 488cb332a0b0..92328f6e5efd 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -103,6 +103,10 @@ bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } +bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { + return TTIImpl->isAlwaysUniform(V); +} + unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index c0181662fd9d..b065f427b06c 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -852,7 +852,8 @@ static void computeKnownBitsFromShiftOperator( Optional<bool> ShifterOperandIsNonZero; // Early exit if we can't constrain any well-defined shift amount. - if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { + if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && + !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) @@ -3026,7 +3027,7 @@ bool llvm::getConstantDataArrayInfo(const Value *V, if (GV->getInitializer()->isNullValue()) { Type *GVTy = GV->getValueType(); if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) { - // A zeroinitializer for the array; There is no ConstantDataArray. + // A zeroinitializer for the array; there is no ConstantDataArray. Array = nullptr; } else { const DataLayout &DL = GV->getParent()->getDataLayout(); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 95987fac74e1..0629c2d326ae 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -733,13 +733,13 @@ private: std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record, bool IsOldProfileFormat, bool HasProfile); - Error parseEntireSummary(); + Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); std::pair<ValueInfo, GlobalValue::GUID> getValueInfoFromValueId(unsigned ValueId); - ModulePathStringTableTy::iterator addThisModulePath(); + ModuleSummaryIndex::ModuleInfo *addThisModule(); }; } // end anonymous namespace @@ -2608,6 +2608,16 @@ Error BitcodeReader::materializeMetadata() { if (Error Err = MDLoader->parseModuleMetadata()) return Err; } + + // Upgrade "Linker Options" module flag to "llvm.linker.options" module-level + // metadata. + if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) { + NamedMDNode *LinkerOpts = + TheModule->getOrInsertNamedMetadata("llvm.linker.options"); + for (const MDOperand &MDOptions : cast<MDNode>(Val)->operands()) + LinkerOpts->addOperand(cast<MDNode>(MDOptions)); + } + DeferredMetadataInfo.clear(); return Error::success(); } @@ -4691,9 +4701,9 @@ ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex), ModulePath(ModulePath), ModuleId(ModuleId) {} -ModulePathStringTableTy::iterator -ModuleSummaryIndexBitcodeReader::addThisModulePath() { - return TheIndex.addModulePath(ModulePath, ModuleId); +ModuleSummaryIndex::ModuleInfo * +ModuleSummaryIndexBitcodeReader::addThisModule() { + return TheIndex.addModule(ModulePath, ModuleId); } std::pair<ValueInfo, GlobalValue::GUID> @@ -4844,6 +4854,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() { return error("Invalid record"); break; case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: + case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: assert(!SeenValueSymbolTable && "Already read VST when parsing summary block?"); // We might not have a VST if there were no values in the @@ -4856,7 +4867,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() { SeenValueSymbolTable = true; } SeenGlobalValSummary = true; - if (Error Err = parseEntireSummary()) + if (Error Err = parseEntireSummary(Entry.ID)) return Err; break; case bitc::MODULE_STRTAB_BLOCK_ID: @@ -4889,7 +4900,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() { case bitc::MODULE_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); - auto &Hash = addThisModulePath()->second.second; + auto &Hash = addThisModule()->second.second; int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); @@ -4964,8 +4975,8 @@ std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallLi // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. -Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { - if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID)) +Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { + if (Stream.EnterSubBlock(ID)) return error("Invalid record"); SmallVector<uint64_t, 64> Record; @@ -5070,7 +5081,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); - FS->setModulePath(addThisModulePath()->first()); + FS->setModulePath(addThisModule()->first()); FS->setOriginalName(VIAndOriginalGUID.second); TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS)); break; @@ -5090,7 +5101,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. - AS->setModulePath(addThisModulePath()->first()); + AS->setModulePath(addThisModule()->first()); GlobalValue::GUID AliaseeGUID = getValueInfoFromValueId(AliaseeID).first.getGUID(); @@ -5113,7 +5124,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { std::vector<ValueInfo> Refs = makeRefList(ArrayRef<uint64_t>(Record).slice(2)); auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs)); - FS->setModulePath(addThisModulePath()->first()); + FS->setModulePath(addThisModule()->first()); auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); @@ -5241,6 +5252,20 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}}); break; } + case bitc::FS_CFI_FUNCTION_DEFS: { + std::set<std::string> &CfiFunctionDefs = TheIndex.cfiFunctionDefs(); + for (unsigned I = 0; I != Record.size(); I += 2) + CfiFunctionDefs.insert( + {Strtab.data() + Record[I], static_cast<size_t>(Record[I + 1])}); + break; + } + case bitc::FS_CFI_FUNCTION_DECLS: { + std::set<std::string> &CfiFunctionDecls = TheIndex.cfiFunctionDecls(); + for (unsigned I = 0; I != Record.size(); I += 2) + CfiFunctionDecls.insert( + {Strtab.data() + Record[I], static_cast<size_t>(Record[I + 1])}); + break; + } } } llvm_unreachable("Exit infinite loop"); @@ -5255,7 +5280,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { SmallVector<uint64_t, 64> Record; SmallString<128> ModulePath; - ModulePathStringTableTy::iterator LastSeenModulePath; + ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -5282,8 +5307,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { if (convertToString(Record, 1, ModulePath)) return error("Invalid record"); - LastSeenModulePath = TheIndex.addModulePath(ModulePath, ModuleId); - ModuleIdMap[ModuleId] = LastSeenModulePath->first(); + LastSeenModule = TheIndex.addModule(ModulePath, ModuleId); + ModuleIdMap[ModuleId] = LastSeenModule->first(); ModulePath.clear(); break; @@ -5292,15 +5317,15 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { case bitc::MST_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); - if (LastSeenModulePath == TheIndex.modulePaths().end()) + if (!LastSeenModule) return error("Invalid hash that does not follow a module path"); int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); - LastSeenModulePath->second.second[Pos++] = Val; + LastSeenModule->second.second[Pos++] = Val; } - // Reset LastSeenModulePath to avoid overriding the hash unexpectedly. - LastSeenModulePath = TheIndex.modulePaths().end(); + // Reset LastSeenModule to avoid overriding the hash unexpectedly. + LastSeenModule = nullptr; break; } } @@ -5507,13 +5532,16 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, } // Parse the specified bitcode buffer and merge the index into CombinedIndex. +// We don't use ModuleIdentifier here because the client may need to control the +// module path used in the combined summary (e.g. when reading summaries for +// regular LTO modules). Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex, - unsigned ModuleId) { + StringRef ModulePath, uint64_t ModuleId) { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex, - ModuleIdentifier, ModuleId); + ModulePath, ModuleId); return R.parseModule(); } @@ -5533,7 +5561,7 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() { } // Check if the given bitcode buffer contains a global value summary block. -Expected<bool> BitcodeModule::hasSummary() { +Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); @@ -5547,11 +5575,14 @@ Expected<bool> BitcodeModule::hasSummary() { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - return false; + return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false}; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) - return true; + return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true}; + + if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) + return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true}; // Ignore other sub-blocks. if (Stream.SkipBlock()) @@ -5638,12 +5669,12 @@ Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) { Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer, ModuleSummaryIndex &CombinedIndex, - unsigned ModuleId) { + uint64_t ModuleId) { Expected<BitcodeModule> BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->readSummary(CombinedIndex, ModuleId); + return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId); } Expected<std::unique_ptr<ModuleSummaryIndex>> @@ -5655,12 +5686,12 @@ llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) { return BM->getSummary(); } -Expected<bool> llvm::hasGlobalValueSummary(MemoryBufferRef Buffer) { +Expected<BitcodeLTOInfo> llvm::getBitcodeLTOInfo(MemoryBufferRef Buffer) { Expected<BitcodeModule> BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->hasSummary(); + return BM->getLTOInfo(); } Expected<std::unique_ptr<ModuleSummaryIndex>> diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index ee2fe2a0cc18..b1504a8034e0 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -407,6 +407,11 @@ void PlaceholderQueue::flush(BitcodeReaderMetadataList &MetadataList) { } // anonynous namespace +static Error error(const Twine &Message) { + return make_error<StringError>( + Message, make_error_code(BitcodeError::CorruptedBitcode)); +} + class MetadataLoader::MetadataLoaderImpl { BitcodeReaderMetadataList MetadataList; BitcodeReaderValueList &ValueList; @@ -533,6 +538,88 @@ class MetadataLoader::MetadataLoaderImpl { } } + /// Upgrade the expression from previous versions. + Error upgradeDIExpression(uint64_t FromVersion, + MutableArrayRef<uint64_t> &Expr, + SmallVectorImpl<uint64_t> &Buffer) { + auto N = Expr.size(); + switch (FromVersion) { + default: + return error("Invalid record"); + case 0: + if (N >= 3 && Expr[N - 3] == dwarf::DW_OP_bit_piece) + Expr[N - 3] = dwarf::DW_OP_LLVM_fragment; + LLVM_FALLTHROUGH; + case 1: + // Move DW_OP_deref to the end. + if (N && Expr[0] == dwarf::DW_OP_deref) { + auto End = Expr.end(); + if (Expr.size() >= 3 && + *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment) + End = std::prev(End, 3); + std::move(std::next(Expr.begin()), End, Expr.begin()); + *std::prev(End) = dwarf::DW_OP_deref; + } + NeedDeclareExpressionUpgrade = true; + LLVM_FALLTHROUGH; + case 2: { + // Change DW_OP_plus to DW_OP_plus_uconst. + // Change DW_OP_minus to DW_OP_uconst, DW_OP_minus + auto SubExpr = ArrayRef<uint64_t>(Expr); + while (!SubExpr.empty()) { + // Skip past other operators with their operands + // for this version of the IR, obtained from + // from historic DIExpression::ExprOperand::getSize(). + size_t HistoricSize; + switch (SubExpr.front()) { + default: + HistoricSize = 1; + break; + case dwarf::DW_OP_constu: + case dwarf::DW_OP_minus: + case dwarf::DW_OP_plus: + HistoricSize = 2; + break; + case dwarf::DW_OP_LLVM_fragment: + HistoricSize = 3; + break; + } + + // If the expression is malformed, make sure we don't + // copy more elements than we should. + HistoricSize = std::min(SubExpr.size(), HistoricSize); + ArrayRef<uint64_t> Args = SubExpr.slice(1, HistoricSize-1); + + switch (SubExpr.front()) { + case dwarf::DW_OP_plus: + Buffer.push_back(dwarf::DW_OP_plus_uconst); + Buffer.append(Args.begin(), Args.end()); + break; + case dwarf::DW_OP_minus: + Buffer.push_back(dwarf::DW_OP_constu); + Buffer.append(Args.begin(), Args.end()); + Buffer.push_back(dwarf::DW_OP_minus); + break; + default: + Buffer.push_back(*SubExpr.begin()); + Buffer.append(Args.begin(), Args.end()); + break; + } + + // Continue with remaining elements. + SubExpr = SubExpr.slice(HistoricSize); + } + Expr = MutableArrayRef<uint64_t>(Buffer); + LLVM_FALLTHROUGH; + } + case 3: + // Up-to-date! + break; + } + + return Error::success(); + } + void upgradeDebugInfo() { upgradeCUSubprograms(); upgradeCUVariables(); @@ -590,11 +677,6 @@ public: void upgradeDebugIntrinsics(Function &F) { upgradeDeclareExpressions(F); } }; -static Error error(const Twine &Message) { - return make_error<StringError>( - Message, make_error_code(BitcodeError::CorruptedBitcode)); -} - Expected<bool> MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { IndexCursor = Stream; @@ -1551,34 +1633,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( IsDistinct = Record[0] & 1; uint64_t Version = Record[0] >> 1; auto Elts = MutableArrayRef<uint64_t>(Record).slice(1); - unsigned N = Elts.size(); - // Perform various upgrades. - switch (Version) { - case 0: - if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece) - Elts[N - 3] = dwarf::DW_OP_LLVM_fragment; - LLVM_FALLTHROUGH; - case 1: - // Move DW_OP_deref to the end. - if (N && Elts[0] == dwarf::DW_OP_deref) { - auto End = Elts.end(); - if (Elts.size() >= 3 && *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment) - End = std::prev(End, 3); - std::move(std::next(Elts.begin()), End, Elts.begin()); - *std::prev(End) = dwarf::DW_OP_deref; - } - NeedDeclareExpressionUpgrade = true; - LLVM_FALLTHROUGH; - case 2: - // Up-to-date! - break; - default: - return error("Invalid record"); - } + + SmallVector<uint64_t, 6> Buffer; + if (Error Err = upgradeDIExpression(Version, Elts, Buffer)) + return Err; MetadataList.assignValue( - GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))), - NextMetadataNo); + GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo); NextMetadataNo++; break; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index d5879fec95cb..feeba31908ae 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -77,10 +77,13 @@ protected: /// The stream created and owned by the client. BitstreamWriter &Stream; + StringTableBuilder &StrtabBuilder; + public: /// Constructs a BitcodeWriterBase object that writes to the provided /// \p Stream. - BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {} + BitcodeWriterBase(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder) + : Stream(Stream), StrtabBuilder(StrtabBuilder) {} protected: void writeBitcodeHeader(); @@ -97,8 +100,6 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { /// Pointer to the buffer allocated by caller for bitcode writing. const SmallVectorImpl<char> &Buffer; - StringTableBuilder &StrtabBuilder; - /// The Module to write to bitcode. const Module &M; @@ -142,8 +143,8 @@ public: BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash = nullptr) - : BitcodeWriterBase(Stream), Buffer(Buffer), StrtabBuilder(StrtabBuilder), - M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index), + : BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M), + VE(*M, ShouldPreserveUseListOrder), Index(Index), GenerateHash(GenerateHash), ModHash(ModHash), BitcodeStartBit(Stream.GetCurrentBitNo()) { // Assign ValueIds to any callee values in the index that came from @@ -331,10 +332,11 @@ public: /// Constructs a IndexBitcodeWriter object for the given combined index, /// writing to the provided \p Buffer. When writing a subset of the index /// for a distributed backend, provide a \p ModuleToSummariesForIndex map. - IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index, + IndexBitcodeWriter(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder, + const ModuleSummaryIndex &Index, const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex = nullptr) - : BitcodeWriterBase(Stream), Index(Index), + : BitcodeWriterBase(Stream, StrtabBuilder), Index(Index), ModuleToSummariesForIndex(ModuleToSummariesForIndex) { // Assign unique value ids to all summaries to be written, for use // in writing out the call graph edges. Save the mapping from GUID @@ -1663,7 +1665,7 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N, SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) { Record.reserve(N->getElements().size() + 1); - const uint64_t Version = 2 << 1; + const uint64_t Version = 3 << 1; Record.push_back((uint64_t)N->isDistinct() | Version); Record.append(N->elements_begin(), N->elements_end()); @@ -3595,6 +3597,24 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { MaybeEmitOriginalName(*AS); } + if (!Index.cfiFunctionDefs().empty()) { + for (auto &S : Index.cfiFunctionDefs()) { + NameVals.push_back(StrtabBuilder.add(S)); + NameVals.push_back(S.size()); + } + Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals); + NameVals.clear(); + } + + if (!Index.cfiFunctionDecls().empty()) { + for (auto &S : Index.cfiFunctionDecls()) { + NameVals.push_back(StrtabBuilder.add(S)); + NameVals.push_back(S.size()); + } + Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals); + NameVals.clear(); + } + Stream.ExitBlock(); } @@ -3829,6 +3849,14 @@ void BitcodeWriter::writeModule(const Module *M, ModuleWriter.write(); } +void BitcodeWriter::writeIndex( + const ModuleSummaryIndex *Index, + const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) { + IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index, + ModuleToSummariesForIndex); + IndexWriter.write(); +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, @@ -3880,11 +3908,9 @@ void llvm::WriteIndexToFile( SmallVector<char, 0> Buffer; Buffer.reserve(256 * 1024); - BitstreamWriter Stream(Buffer); - writeBitcodeHeader(Stream); - - IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex); - IndexWriter.write(); + BitcodeWriter Writer(Buffer); + Writer.writeIndex(&Index, ModuleToSummariesForIndex); + Writer.writeStrtab(); Out.write((char *)&Buffer.front(), Buffer.size()); } diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index f7c09be15fb7..946067e6358f 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -24,3 +24,4 @@ add_subdirectory(Fuzzer) add_subdirectory(Passes) add_subdirectory(ToolDrivers) add_subdirectory(XRay) +add_subdirectory(Testing) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 407d5623d670..ad348d723bae 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1286,11 +1286,7 @@ bool AsmPrinter::doFinalization(Module &M) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - // Emit module flags. - SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; - M.getModuleFlagsMetadata(ModuleFlags); - if (!ModuleFlags.empty()) - TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM); + TLOF.emitModuleMetadata(*OutStreamer, M, TM); if (TM.getTargetTriple().isOSBinFormatELF()) { MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 04073b3aed68..dc39d1e6cb52 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -552,7 +552,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); DwarfExpr.addFragmentOffset(Expr); SmallVector<uint64_t, 8> Ops; - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); Ops.append(Expr->elements_begin(), Expr->elements_end()); DIExpressionCursor Cursor(Ops); @@ -821,7 +821,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, SmallVector<uint64_t, 8> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } DIExpressionCursor Cursor(Ops); @@ -850,7 +850,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, SmallVector<uint64_t, 8> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e3fd21a1fd70..75eb355bfb54 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1511,7 +1511,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.setMemoryLocationKind(); SmallVector<uint64_t, 8> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index ebfba4cfc275..5dfe06c64ec2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -134,6 +134,13 @@ public: assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); + if (FrameIndexExprs.size()) { + auto *Expr = FrameIndexExprs.back().Expr; + // Get rid of duplicate non-fragment entries. More than one non-fragment + // dbg.declare makes no sense so ignore all but the first. + if (!Expr || !Expr->isFragment()) + return; + } FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); assert(all_of(FrameIndexExprs, [](FrameIndexExpr &FIE) { diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index d96479f43433..fe38ee805682 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -248,15 +248,25 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, assert(Reg.Size == 0 && "subregister has same size as superregister"); // Pattern-match combinations for which more efficient representations exist. - // [Reg, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]. - // [Reg, Offset, DW_OP_minus] --> [DW_OP_breg, -Offset]. - // If Reg is a subregister we need to mask it out before subtracting. - if (Op && ((Op->getOp() == dwarf::DW_OP_plus) || - (Op->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { - int Offset = Op->getArg(0); - SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset; + // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. + if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { + SignedOffset = Op->getArg(0); ExprCursor.take(); } + + // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] + // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] + // If Reg is a subregister we need to mask it out before subtracting. + if (Op && Op->getOp() == dwarf::DW_OP_constu) { + auto N = ExprCursor.peekNext(); + if (N && (N->getOp() == dwarf::DW_OP_plus || + (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { + int Offset = Op->getArg(0); + SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset; + ExprCursor.consume(2); + } + } + if (FBReg) addFBReg(SignedOffset); else @@ -320,17 +330,14 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, LocationKind = Unknown; return; } - case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus_uconst: assert(LocationKind != Register); emitOp(dwarf::DW_OP_plus_uconst); emitUnsigned(Op->getArg(0)); break; + case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: - assert(LocationKind != Register); - // There is no DW_OP_minus_uconst. - emitOp(dwarf::DW_OP_constu); - emitUnsigned(Op->getArg(0)); - emitOp(dwarf::DW_OP_minus); + emitOp(Op->getOp()); break; case dwarf::DW_OP_deref: { assert(LocationKind != Register); diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index de8613200067..728f8ad9225b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -42,6 +42,9 @@ public: DIExpressionCursor(ArrayRef<uint64_t> Expr) : Start(Expr.begin()), End(Expr.end()) {} + DIExpressionCursor(const DIExpressionCursor &C) + : Start(C.Start), End(C.End) {} + /// Consume one operation. Optional<DIExpression::ExprOperand> take() { if (Start == End) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 7f7d3e650e02..708f5f7536ff 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -475,7 +475,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, SmallVector<uint64_t, 9> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } // If we started with a pointer to the __Block_byref... struct, then @@ -487,7 +487,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(forwardingFieldOffset); } @@ -499,7 +499,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(varFieldOffset); } diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c2037cb7f1ae..37e176099ea7 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -134,7 +134,7 @@ static cl::opt<bool> DisablePreheaderProtect( cl::desc("Disable protection against removing loop preheaders")); static cl::opt<bool> ProfileGuidedSectionPrefix( - "profile-guided-section-prefix", cl::Hidden, cl::init(true), + "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Use profile info to add section prefix for hot/cold functions")); static cl::opt<unsigned> FreqRatioToSkipMerge( diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ef5818dabe23..1d0d3dffa4c5 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -82,6 +82,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_UDIV: assert(Size == 32 && "Unsupported size"); return RTLIB::UDIV_I32; + case TargetOpcode::G_SREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SREM_I32; + case TargetOpcode::G_UREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UREM_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -93,43 +99,57 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } -static LegalizerHelper::LegalizeResult -simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, - Type *OpType) { +LegalizerHelper::LegalizeResult llvm::replaceWithLibcall( + MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - auto Libcall = getRTLibDesc(MI.getOpcode(), Size); const char *Name = TLI.getLibcallName(Libcall); MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), - {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); + MIRBuilder.setInstr(MI); + if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), + MachineOperand::CreateES(Name), Result, Args)) + return LegalizerHelper::UnableToLegalize; + + // We're about to remove MI, so move the insert point after it. + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), + std::next(MIRBuilder.getInsertPt())); + MI.eraseFromParent(); return LegalizerHelper::Legalized; } +static LegalizerHelper::LegalizeResult +simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType) { + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + return replaceWithLibcall(MI, MIRBuilder, Libcall, + {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned Size = Ty.getSizeInBits(); + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); - MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; case TargetOpcode::G_SDIV: - case TargetOpcode::G_UDIV: { - Type *Ty = Type::getInt32Ty(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, Ty); + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + Type *HLTy = Type::getInt32Ty(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, HLTy); } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { - Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, Ty); + Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, HLTy); } } } @@ -237,17 +257,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector<unsigned, 2> DstRegs; for (int i = 0; i < NumParts; ++i) { unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); + unsigned SrcReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(SrcReg, MI.getOperand(1).getReg(), Offset); // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin()); @@ -263,17 +284,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector<unsigned, 2> SrcRegs; extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(DstReg, MI.getOperand(1).getReg(), Offset); + unsigned DstReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); + // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin()); diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 54ef7e5c5a1b..79d312fb52ca 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -191,6 +191,24 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, .addUse(Op1); } +Optional<MachineInstrBuilder> +MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, uint64_t Value) { + assert(Res == 0 && "Res is a result argument"); + assert(ValueTy.isScalar() && "invalid offset type"); + + if (Value == 0) { + Res = Op0; + return None; + } + + Res = MRI->createGenericVirtualRegister(MRI->getType(Op0)); + unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy); + + buildConstant(TmpReg, Value); + return buildGEP(Res, Op0, TmpReg); +} + MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits) { assert(MRI->getType(Res).isPointer() && diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index fc52b0da0d61..2d4b95974cc6 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -594,8 +594,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors( // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after // A->C is chosen as a fall-through, D won't be selected as a successor of C // due to CFG constraint (the probability of C->D is not greater than - // HotProb to break top-order). If we exclude E that is not in BlockFilter - // when calculating the probability of C->D, D will be selected and we + // HotProb to break topo-order). If we exclude E that is not in BlockFilter + // when calculating the probability of C->D, D will be selected and we // will get A C D B as the layout of this loop. auto AdjustedSumProb = BranchProbability::getOne(); for (MachineBasicBlock *Succ : BB->successors()) { @@ -1156,7 +1156,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { continue; // Now we have an interesting triangle. Insert it if it's not part of an - // existing chain + // existing chain. // Note: This cannot be replaced with a call insert() or emplace() because // the find key is BB, but the insert/emplace key is PDom. auto Found = TriangleChainMap.find(&BB); @@ -1298,9 +1298,9 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( // | | | | // ---BB | | BB // | | | | - // | pred-- | Succ-- + // | Pred-- | Succ-- // | | | | - // ---succ ---pred-- + // ---Succ ---Pred-- // // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred) // = freq(S->Pred) + freq(S->BB) diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 52d5819f8dbc..c7113f1fdc47 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -895,8 +895,11 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->isConstantPhysReg(Reg)) - return false; + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent()))) + return false; // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a0967f574006..2d4422d94a17 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2217,7 +2217,8 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // Iff the flag result is dead: // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) - if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::UADDO) && + if ((N0.getOpcode() == ISD::ADD || + (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && isNullConstant(N1) && !N->hasAnyUseOfValue(1)) return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); @@ -12460,10 +12461,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); - SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - FirstInChain->getAlignment()); + + // make sure we use trunc store if it's necessary to be legal. + SDValue NewStore; + if (TLI.isTypeLegal(StoredVal.getValueType())) { + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlignment()); + } else { // Must be realized as a trunc store + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); + ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); + SDValue ExtendedStoreVal = + DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, + LegalizedStoredValueTy); + NewStore = DAG.getTruncStore( + NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } // Replace all merged stores with the new store. for (unsigned i = 0; i < NumStores; ++i) @@ -12731,8 +12749,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { IsFast) { LastLegalType = i + 1; // Or check whether a truncstore is legal. - } else if (!LegalTypes && - TLI.getTypeAction(Context, StoreTy) == + } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); @@ -12947,8 +12964,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (!LegalTypes && - TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && @@ -12958,8 +12974,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstLoadAS, FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) LastLegalIntegerType = i + 1; } @@ -13189,10 +13205,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Chain = ST->getChain(); } - // Try transforming N to an indexed store. - if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) - return SDValue(N, 0); - // FIXME: is there such a thing as a truncating indexed store? if (ST->isTruncatingStore() && ST->isUnindexed() && Value.getValueType().isInteger()) { @@ -13287,6 +13299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // // Make sure to do this only after attempting to merge stores in order to @@ -14692,21 +14708,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, VT.getStoreSize()); SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); - - // The new load must have the same position as the old load in terms of memory - // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's - // output chain to use that TokenFactor. - // TODO: This code is based on a similar sequence in x86 lowering. It should - // be moved to a helper function, so it can be shared and reused. - if (Ld->hasAnyUseOfValue(1)) { - SDValue OldChain = SDValue(Ld, 1); - SDValue NewChain = SDValue(NewLd.getNode(), 1); - SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - OldChain, NewChain); - DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor); - DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); - } - + DAG.makeEquivalentMemoryOrdering(Ld, NewLd); return NewLd; } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 606b8952f3c1..b736037d71dd 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -523,3 +523,29 @@ void FunctionLoweringInfo::setCurrentSwiftErrorVReg( const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) { SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg; } + +std::pair<unsigned, bool> +FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) { + auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + auto &DL = MF->getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} + +std::pair<unsigned, bool> +FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) { + auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e54eaa3b81be..15e87b7af18d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2192,19 +2192,6 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != nullptr; } -/// Return true if sincos libcall is available and can be used to combine sin -/// and cos. -static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, - const TargetMachine &TM) { - if (!isSinCosLibcallAvailable(Node, TLI)) - return false; - // GNU sin/cos functions set errno while sincos does not. Therefore - // combining sin and cos is only safe if unsafe-fpmath is enabled. - if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath) - return false; - return true; -} - /// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN @@ -3247,7 +3234,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || - canCombineSinCosLibcall(Node, TLI, TM)) + isSinCosLibcallAvailable(Node, TLI)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0d5e07ded25c..a3ba52a148ee 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1828,10 +1828,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + if (hasOVF) { EVT OvfVT = getSetCCResultType(NVT); SDVTList VTList = DAG.getVTList(NVT, OvfVT); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; @@ -1864,6 +1865,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); + + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { + SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + return; + } + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); @@ -1878,9 +1886,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, - DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); + + SDValue Borrow; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index dff8bd2ad37d..7abdc76cb004 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7244,6 +7244,24 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { AddDbgValue(I, ToNode, false); } +void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { + assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); + if (!OldLoad->hasAnyUseOfValue(1)) + return; + + // The new memory operation must have the same position as the old load in + // terms of memory dependency. Create a TokenFactor for the old load and new + // memory operation and update uses of the old load's output chain to use that + // TokenFactor. + SDValue OldChain = SDValue(OldLoad, 1); + SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + SDValue TokenFactor = + getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); + ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d34ac40b9496..f9f431db55be 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1496,9 +1496,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. - OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg( - FuncInfo.MBB, FuncInfo.SwiftErrorArg), - EVT(TLI.getPointerTy(DL)))); + OutVals.push_back( + DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( + &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); @@ -3581,8 +3582,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - assert(TLI.supportSwiftError() && + assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector<EVT, 4> ValueVTs; @@ -3595,15 +3595,15 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - auto &DL = DAG.getDataLayout(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3633,7 +3633,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), - FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); + FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, + ValueVTs[0]); setValue(&I, L); } @@ -4942,11 +4943,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { updateDAGForMaybeTailCall(MM); return nullptr; } - case Intrinsic::memcpy_element_atomic: { - SDValue Dst = getValue(I.getArgOperand(0)); - SDValue Src = getValue(I.getArgOperand(1)); - SDValue NumElements = getValue(I.getArgOperand(2)); - SDValue ElementSize = getValue(I.getArgOperand(3)); + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst &MI = + cast<ElementUnorderedAtomicMemCpyInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); // Emit a library call. TargetLowering::ArgListTy Args; @@ -4958,18 +4960,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Entry.Node = Src; Args.push_back(Entry); - Entry.Ty = I.getArgOperand(2)->getType(); - Entry.Node = NumElements; - Args.push_back(Entry); - - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.Node = ElementSize; + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; Args.push_back(Entry); - uint64_t ElementSizeConstant = - cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); RTLIB::Libcall LibraryCall = - RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported element size"); @@ -6030,9 +6027,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = - DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = DAG.getRegister(FuncInfo + .getOrCreateSwiftErrorVRegUseAt( + CS.getInstruction(), FuncInfo.MBB, V) + .first, + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -6073,11 +6072,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = + FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b67f11f85b70..dcccd17bb98e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1055,6 +1055,7 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, FuncInfo->SwiftErrorVals.clear(); FuncInfo->SwiftErrorVRegDefMap.clear(); FuncInfo->SwiftErrorVRegUpwardsUse.clear(); + FuncInfo->SwiftErrorVRegDefUses.clear(); FuncInfo->SwiftErrorArg = nullptr; // Check if function has a swifterror argument. @@ -1278,6 +1279,80 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } } +void preassignSwiftErrorRegs(const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { + if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty()) + return; + + // Iterator over instructions and assign vregs to swifterror defs and uses. + for (auto It = Begin; It != End; ++It) { + ImmutableCallSite CS(&*It); + if (CS) { + // A call-site with a swifterror argument is both use and def. + const Value *SwiftErrorAddr = nullptr; + for (auto &Arg : CS.args()) { + if (!Arg->isSwiftError()) + continue; + // Use of swifterror. + assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments"); + SwiftErrorAddr = &*Arg; + assert(SwiftErrorAddr->isSwiftError() && + "Must have a swifterror value argument"); + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + &*It, FuncInfo->MBB, SwiftErrorAddr); + assert(CreatedReg); + } + if (!SwiftErrorAddr) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A load is a use. + } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) { + const Value *V = LI->getOperand(0); + if (!V->isSwiftError()) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V); + assert(CreatedReg); + + // A store is a def. + } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) { + const Value *SwiftErrorAddr = SI->getOperand(1); + if (!SwiftErrorAddr->isSwiftError()) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A return in a swiferror returning function is a use. + } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) { + const Function *F = R->getParent()->getParent(); + if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + R, FuncInfo->MBB, FuncInfo->SwiftErrorArg); + assert(CreatedReg); + } + } +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. @@ -1384,6 +1459,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->startNewBlock(); unsigned NumFastIselRemaining = std::distance(Begin, End); + + // Pre-assign swifterror vregs. + preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End); + // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = &*std::prev(BI); diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 3a50aaa69985..008b984dd961 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -569,8 +569,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, // Greedy heuristic: Keep iterating keeping the best covering subreg index // each time. - LaneBitmask LanesLeft = - LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover)); + LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx)); while (LanesLeft.any()) { unsigned BestIdx = 0; int BestCover = INT_MIN; diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index acb3676fdd71..6bac39c7ee77 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -86,10 +86,134 @@ STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// +// +// Stack Coloring reduces stack usage by merging stack slots when they +// can't be used together. For example, consider the following C program: +// +// void bar(char *, int); +// void foo(bool var) { +// A: { +// char z[4096]; +// bar(z, 0); +// } +// +// char *p; +// char x[4096]; +// char y[4096]; +// if (var) { +// p = x; +// } else { +// bar(y, 1); +// p = y + 1024; +// } +// B: +// bar(p, 2); +// } +// +// Naively-compiled, this program would use 12k of stack space. However, the +// stack slot corresponding to `z` is always destroyed before either of the +// stack slots for `x` or `y` are used, and then `x` is only used if `var` +// is true, while `y` is only used if `var` is false. So in no time are 2 +// of the stack slots used together, and therefore we can merge them, +// compiling the function using only a single 4k alloca: +// +// void foo(bool var) { // equivalent +// char x[4096]; +// char *p; +// bar(x, 0); +// if (var) { +// p = x; +// } else { +// bar(x, 1); +// p = x + 1024; +// } +// bar(p, 2); +// } +// +// This is an important optimization if we want stack space to be under +// control in large functions, both open-coded ones and ones created by +// inlining. // // Implementation Notes: // --------------------- // +// An important part of the above reasoning is that `z` can't be accessed +// while the latter 2 calls to `bar` are running. This is justified because +// `z`'s lifetime is over after we exit from block `A:`, so any further +// accesses to it would be UB. The way we represent this information +// in LLVM is by having frontends delimit blocks with `lifetime.start` +// and `lifetime.end` intrinsics. +// +// The effect of these intrinsics seems to be as follows (maybe I should +// specify this in the reference?): +// +// L1) at start, each stack-slot is marked as *out-of-scope*, unless no +// lifetime intrinsic refers to that stack slot, in which case +// it is marked as *in-scope*. +// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and +// the stack slot is overwritten with `undef`. +// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*. +// L4) on function exit, all stack slots are marked as *out-of-scope*. +// L5) `lifetime.end` is a no-op when called on a slot that is already +// *out-of-scope*. +// L6) memory accesses to *out-of-scope* stack slots are UB. +// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it +// are invalidated, unless the slot is "degenerate". This is used to +// justify not marking slots as in-use until the pointer to them is +// used, but feels a bit hacky in the presence of things like LICM. See +// the "Degenerate Slots" section for more details. +// +// Now, let's ground stack coloring on these rules. We'll define a slot +// as *in-use* at a (dynamic) point in execution if it either can be +// written to at that point, or if it has a live and non-undef content +// at that point. +// +// Obviously, slots that are never *in-use* together can be merged, and +// in our example `foo`, the slots for `x`, `y` and `z` are never +// in-use together (of course, sometimes slots that *are* in-use together +// might still be mergable, but we don't care about that here). +// +// In this implementation, we successively merge pairs of slots that are +// not *in-use* together. We could be smarter - for example, we could merge +// a single large slot with 2 small slots, or we could construct the +// interference graph and run a "smart" graph coloring algorithm, but with +// that aside, how do we find out whether a pair of slots might be *in-use* +// together? +// +// From our rules, we see that *out-of-scope* slots are never *in-use*, +// and from (L7) we see that "non-degenerate" slots remain non-*in-use* +// until their address is taken. Therefore, we can approximate slot activity +// using dataflow. +// +// A subtle point: naively, we might try to figure out which pairs of +// stack-slots interfere by propagating `S in-use` through the CFG for every +// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in +// which they are both *in-use*. +// +// That is sound, but overly conservative in some cases: in our (artificial) +// example `foo`, either `x` or `y` might be in use at the label `B:`, but +// as `x` is only in use if we came in from the `var` edge and `y` only +// if we came from the `!var` edge, they still can't be in use together. +// See PR32488 for an important real-life case. +// +// If we wanted to find all points of interference precisely, we could +// propagate `S in-use` and `S&T in-use` predicates through the CFG. That +// would be precise, but requires propagating `O(n^2)` dataflow facts. +// +// However, we aren't interested in the *set* of points of interference +// between 2 stack slots, only *whether* there *is* such a point. So we +// can rely on a little trick: for `S` and `T` to be in-use together, +// one of them needs to become in-use while the other is in-use (or +// they might both become in use simultaneously). We can check this +// by also keeping track of the points at which a stack slot might *start* +// being in-use. +// +// Exact first use: +// ---------------- +// // Consider the following motivating example: // // int foo() { @@ -158,6 +282,9 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 // byte stack (better). // +// Degenerate Slots: +// ----------------- +// // Relying entirely on first-use of stack slots is problematic, // however, due to the fact that optimizations can sometimes migrate // uses of a variable outside of its lifetime start/end region. Here @@ -237,10 +364,6 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // for "b" then it will appear that 'b' has a degenerate lifetime. // -//===----------------------------------------------------------------------===// -// StackColoring Pass -//===----------------------------------------------------------------------===// - namespace { /// StackColoring - A machine pass for merging disjoint stack allocations, /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. @@ -271,8 +394,11 @@ class StackColoring : public MachineFunctionPass { /// Maps basic blocks to a serial number. SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; - /// Maps liveness intervals for each slot. + /// Maps slots to their use interval. Outside of this interval, slots + /// values are either dead or `undef` and they will not be written to. SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals; + /// Maps slots to the points where they can become in-use. + SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts; /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; /// SlotIndex analysis object. @@ -672,15 +798,22 @@ void StackColoring::calculateLocalLiveness() void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SmallVector<SlotIndex, 16> Starts; - SmallVector<SlotIndex, 16> Finishes; + SmallVector<bool, 16> DefinitelyInUse; // For each block, find which slots are active within this block // and update the live intervals. for (const MachineBasicBlock &MBB : *MF) { Starts.clear(); Starts.resize(NumSlots); - Finishes.clear(); - Finishes.resize(NumSlots); + DefinitelyInUse.clear(); + DefinitelyInUse.resize(NumSlots); + + // Start the interval of the slots that we previously found to be 'in-use'. + BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(&MBB); + } // Create the interval for the basic blocks containing lifetime begin/end. for (const MachineInstr &MI : MBB) { @@ -692,66 +825,35 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); for (auto Slot : slots) { if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + // If a slot is already definitely in use, we don't have to emit + // a new start marker because there is already a pre-existing + // one. + if (!DefinitelyInUse[Slot]) { + LiveStarts[Slot].push_back(ThisIndex); + DefinitelyInUse[Slot] = true; + } + if (!Starts[Slot].isValid()) Starts[Slot] = ThisIndex; } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + if (Starts[Slot].isValid()) { + VNInfo *VNI = Intervals[Slot]->getValNumInfo(0); + Intervals[Slot]->addSegment( + LiveInterval::Segment(Starts[Slot], ThisIndex, VNI)); + Starts[Slot] = SlotIndex(); // Invalidate the start index + DefinitelyInUse[Slot] = false; + } } } } - // Create the interval of the blocks that we previously found to be 'alive'. - BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; - for (unsigned pos : MBBLiveness.LiveIn.set_bits()) { - Starts[pos] = Indexes->getMBBStartIdx(&MBB); - } - for (unsigned pos : MBBLiveness.LiveOut.set_bits()) { - Finishes[pos] = Indexes->getMBBEndIdx(&MBB); - } - + // Finish up started segments for (unsigned i = 0; i < NumSlots; ++i) { - // - // When LifetimeStartOnFirstUse is turned on, data flow analysis - // is forward (from starts to ends), not bidirectional. A - // consequence of this is that we can wind up in situations - // where Starts[i] is invalid but Finishes[i] is valid and vice - // versa. Example: - // - // LIFETIME_START x - // if (...) { - // <use of x> - // throw ...; - // } - // LIFETIME_END x - // return 2; - // - // - // Here the slot for "x" will not be live into the block - // containing the "return 2" (since lifetimes start with first - // use, not at the dominating LIFETIME_START marker). - // - if (Starts[i].isValid() && !Finishes[i].isValid()) { - Finishes[i] = Indexes->getMBBEndIdx(&MBB); - } if (!Starts[i].isValid()) continue; - assert(Starts[i] && Finishes[i] && "Invalid interval"); - VNInfo *ValNum = Intervals[i]->getValNumInfo(0); - SlotIndex S = Starts[i]; - SlotIndex F = Finishes[i]; - if (S < F) { - // We have a single consecutive region. - Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); - } else { - // We have two non-consecutive regions. This happens when - // LIFETIME_START appears after the LIFETIME_END marker. - SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); - SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); - Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); - Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); - } + SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB); + VNInfo *VNI = Intervals[i]->getValNumInfo(0); + Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI)); } } } @@ -981,6 +1083,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { BasicBlockNumbering.clear(); Markers.clear(); Intervals.clear(); + LiveStarts.clear(); VNInfoAllocator.Reset(); unsigned NumSlots = MFI->getObjectIndexEnd(); @@ -992,6 +1095,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { SmallVector<int, 8> SortedSlots; SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); + LiveStarts.resize(NumSlots); unsigned NumMarkers = collectMarkers(NumSlots); @@ -1063,6 +1167,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); }); + for (auto &s : LiveStarts) + std::sort(s.begin(), s.end()); + bool Changed = true; while (Changed) { Changed = false; @@ -1078,12 +1185,22 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int SecondSlot = SortedSlots[J]; LiveInterval *First = &*Intervals[FirstSlot]; LiveInterval *Second = &*Intervals[SecondSlot]; + auto &FirstS = LiveStarts[FirstSlot]; + auto &SecondS = LiveStarts[SecondSlot]; assert (!First->empty() && !Second->empty() && "Found an empty range"); - // Merge disjoint slots. - if (!First->overlaps(*Second)) { + // Merge disjoint slots. This is a little bit tricky - see the + // Implementation Notes section for an explanation. + if (!First->isLiveAtIndexes(SecondS) && + !Second->isLiveAtIndexes(FirstS)) { Changed = true; First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); + + int OldSize = FirstS.size(); + FirstS.append(SecondS.begin(), SecondS.end()); + auto Mid = FirstS.begin() + OldSize; + std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); + SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 581cfaf60755..e9d38c10c860 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -374,11 +374,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memcpy_element_unordered_atomic_1"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memcpy_element_unordered_atomic_2"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memcpy_element_unordered_atomic_4"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memcpy_element_unordered_atomic_8"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memcpy_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -781,22 +786,21 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { return UNKNOWN_LIBCALL; } -RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) { +RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { switch (ElementSize) { case 1: - return MEMCPY_ELEMENT_ATOMIC_1; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; case 2: - return MEMCPY_ELEMENT_ATOMIC_2; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; case 4: - return MEMCPY_ELEMENT_ATOMIC_4; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; case 8: - return MEMCPY_ELEMENT_ATOMIC_8; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; case 16: - return MEMCPY_ELEMENT_ATOMIC_16; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; default: return UNKNOWN_LIBCALL; } - } /// InitCmpLibcallCCs - Set default comparison libcall CC. diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a0c68e1dcce8..6922e33c8d6c 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -61,9 +61,11 @@ using namespace llvm; using namespace dwarf; -static void GetObjCImageInfo(ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - unsigned &Version, unsigned &Flags, +static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, StringRef &Section) { + SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + for (const auto &MFE: ModuleFlags) { // Ignore flags with 'Require' behaviour. if (MFE.Behavior == Module::Require) @@ -88,14 +90,13 @@ static void GetObjCImageInfo(ArrayRef<Module::ModuleFlagEntry> ModuleFlags, // ELF //===----------------------------------------------------------------------===// -void TargetLoweringObjectFileELF::emitModuleFlags( - MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - const TargetMachine &TM) const { +void TargetLoweringObjectFileELF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { unsigned Version = 0; unsigned Flags = 0; StringRef Section; - GetObjCImageInfo(ModuleFlags, Version, Flags, Section); + GetObjCImageInfo(M, Version, Flags, Section); if (Section.empty()) return; @@ -618,20 +619,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } } -/// emitModuleFlags - Perform code emission for module flags. -void TargetLoweringObjectFileMachO::emitModuleFlags( - MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - const TargetMachine &TM) const { - MDNode *LinkerOptions = nullptr; - - for (const auto &MFE : ModuleFlags) { - StringRef Key = MFE.Key->getString(); - if (Key == "Linker Options") - LinkerOptions = cast<MDNode>(MFE.Val); - } - +void TargetLoweringObjectFileMachO::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { // Emit the linker options if present. - if (LinkerOptions) { + if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { for (const auto &Option : LinkerOptions->operands()) { SmallVector<std::string, 4> StrOptions; for (const auto &Piece : cast<MDNode>(Option)->operands()) @@ -643,7 +634,8 @@ void TargetLoweringObjectFileMachO::emitModuleFlags( unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; StringRef SectionVal; - GetObjCImageInfo(ModuleFlags, VersionVal, ImageInfoFlags, SectionVal); + + GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal); // The section is mandatory. If we don't have it, then we don't have GC info. if (SectionVal.empty()) @@ -1159,18 +1151,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } -void TargetLoweringObjectFileCOFF::emitModuleFlags( - MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - const TargetMachine &TM) const { - MDNode *LinkerOptions = nullptr; - - for (const auto &MFE : ModuleFlags) { - StringRef Key = MFE.Key->getString(); - if (Key == "Linker Options") - LinkerOptions = cast<MDNode>(MFE.Val); - } - - if (LinkerOptions) { +void TargetLoweringObjectFileCOFF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { + if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for // linker. @@ -1190,7 +1173,7 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags( unsigned Flags = 0; StringRef Section; - GetObjCImageInfo(ModuleFlags, Version, Flags, Section); + GetObjCImageInfo(M, Version, Flags, Section); if (Section.empty()) return; diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index 2f9e8981b698..f916695a8439 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMDebugInfoCodeView LazyRandomTypeCollection.cpp Line.cpp RecordSerialization.cpp + StringsAndChecksums.cpp SymbolRecordMapping.cpp SymbolDumper.cpp SymbolSerializer.cpp @@ -32,7 +33,7 @@ add_llvm_library(LLVMDebugInfoCodeView TypeSerializer.cpp TypeStreamMerger.cpp TypeTableCollection.cpp - + ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView ) diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp index 6e647c4b976b..de02525270c4 100644 --- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp +++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp @@ -58,6 +58,10 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const { uint32_t Begin = Writer.getOffset(); uint32_t End = Begin + StringSize; + // Write a null string at the beginning. + if (auto EC = Writer.writeCString(StringRef())) + return EC; + for (auto &Pair : Strings) { StringRef S = Pair.getKey(); uint32_t Offset = Begin + Pair.getValue(); @@ -68,6 +72,7 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const { } Writer.setOffset(End); + assert((End - Begin) == StringSize); return Error::success(); } diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp index e9124e68fe82..334c5e002bbc 100644 --- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp +++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -50,7 +50,7 @@ DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; } BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; } DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( - std::unique_ptr<DebugSubsection> Subsection, CodeViewContainer Container) + std::shared_ptr<DebugSubsection> Subsection, CodeViewContainer Container) : Subsection(std::move(Subsection)), Container(Container) {} uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp index 8550107741ce..9b824333369b 100644 --- a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp +++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp @@ -26,40 +26,9 @@ using namespace llvm; using namespace llvm::codeview; -DebugSubsectionState::DebugSubsectionState() {} - -DebugSubsectionState::DebugSubsectionState( - const DebugStringTableSubsectionRef &Strings) - : Strings(&Strings) {} - -DebugSubsectionState::DebugSubsectionState( - const DebugStringTableSubsectionRef &Strings, - const DebugChecksumsSubsectionRef &Checksums) - : Strings(&Strings), Checksums(&Checksums) {} - -void DebugSubsectionState::initializeStrings(const DebugSubsectionRecord &SR) { - assert(SR.kind() == DebugSubsectionKind::StringTable); - assert(!Strings && "Found a string table even though we already have one!"); - - OwnedStrings = llvm::make_unique<DebugStringTableSubsectionRef>(); - consumeError(OwnedStrings->initialize(SR.getRecordData())); - Strings = OwnedStrings.get(); -} - -void DebugSubsectionState::initializeChecksums( - const DebugSubsectionRecord &FCR) { - assert(FCR.kind() == DebugSubsectionKind::FileChecksums); - if (Checksums) - return; - - OwnedChecksums = llvm::make_unique<DebugChecksumsSubsectionRef>(); - consumeError(OwnedChecksums->initialize(FCR.getRecordData())); - Checksums = OwnedChecksums.get(); -} - -Error llvm::codeview::visitDebugSubsection(const DebugSubsectionRecord &R, - DebugSubsectionVisitor &V, - const DebugSubsectionState &State) { +Error llvm::codeview::visitDebugSubsection( + const DebugSubsectionRecord &R, DebugSubsectionVisitor &V, + const StringsAndChecksumsRef &State) { BinaryStreamReader Reader(R.getRecordData()); switch (R.kind()) { case DebugSubsectionKind::Lines: { diff --git a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp new file mode 100644 index 000000000000..928bf8c94f73 --- /dev/null +++ b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp @@ -0,0 +1,55 @@ +//===- StringsAndChecksums.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" + +using namespace llvm; +using namespace llvm::codeview; + +StringsAndChecksumsRef::StringsAndChecksumsRef() {} + +StringsAndChecksumsRef::StringsAndChecksumsRef( + const DebugStringTableSubsectionRef &Strings) + : Strings(&Strings) {} + +StringsAndChecksumsRef::StringsAndChecksumsRef( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) + : Strings(&Strings), Checksums(&Checksums) {} + +void StringsAndChecksumsRef::initializeStrings( + const DebugSubsectionRecord &SR) { + assert(SR.kind() == DebugSubsectionKind::StringTable); + assert(!Strings && "Found a string table even though we already have one!"); + + OwnedStrings = llvm::make_unique<DebugStringTableSubsectionRef>(); + consumeError(OwnedStrings->initialize(SR.getRecordData())); + Strings = OwnedStrings.get(); +} + +void StringsAndChecksumsRef::setChecksums( + const DebugChecksumsSubsectionRef &CS) { + OwnedChecksums = llvm::make_unique<DebugChecksumsSubsectionRef>(); + *OwnedChecksums = CS; + Checksums = OwnedChecksums.get(); +} + +void StringsAndChecksumsRef::initializeChecksums( + const DebugSubsectionRecord &FCR) { + assert(FCR.kind() == DebugSubsectionKind::FileChecksums); + if (Checksums) + return; + + OwnedChecksums = llvm::make_unique<DebugChecksumsSubsectionRef>(); + consumeError(OwnedChecksums->initialize(FCR.getRecordData())); + Checksums = OwnedChecksums.get(); +} diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 66045933ce9b..36abafc079ed 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -212,7 +212,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FileStaticSym &FileStatic) { DictScope S(W, "FileStatic"); - W.printNumber("Index", FileStatic.Index); + printTypeIndex("Index", FileStatic.Index); W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset); W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames()); W.printString("Name", FileStatic.Name); @@ -516,7 +516,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegisterSym &Register) { DictScope S(W, "RegisterSym"); - W.printNumber("Type", Register.Index); + printTypeIndex("Type", Register.Index); W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames()); W.printString("Name", Register.Name); return Error::success(); @@ -524,7 +524,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { DictScope S(W, "PublicSym"); - W.printNumber("Type", Public.Index); + printTypeIndex("Type", Public.Index); W.printNumber("Seg", Public.Segment); W.printNumber("Off", Public.Offset); W.printString("Name", Public.Name); @@ -631,7 +631,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, W.printHex("Offset", RegRel.Offset); printTypeIndex("Type", RegRel.Type); - W.printHex("Register", RegRel.Register); + W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames()); W.printString("VarName", RegRel.Name); return Error::success(); } diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index ea46841a70f6..d731dc1b0a37 100644 --- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -307,7 +307,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, error(IO.mapInteger(FrameCookie.CodeOffset)); error(IO.mapInteger(FrameCookie.Register)); - error(IO.mapInteger(FrameCookie.CookieKind)); + error(IO.mapEnum(FrameCookie.CookieKind)); error(IO.mapInteger(FrameCookie.Flags)); return Error::success(); @@ -439,7 +439,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, error(IO.mapInteger(RegRel.Offset)); error(IO.mapInteger(RegRel.Type)); - error(IO.mapInteger(RegRel.Register)); + error(IO.mapEnum(RegRel.Register)); error(IO.mapStringZ(RegRel.Name)); return Error::success(); diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp index af05d2dc294b..08f848b36a9d 100644 --- a/lib/DebugInfo/CodeView/TypeDatabase.cpp +++ b/lib/DebugInfo/CodeView/TypeDatabase.cpp @@ -12,59 +12,6 @@ using namespace llvm; using namespace llvm::codeview; -namespace { -struct SimpleTypeEntry { - StringRef Name; - SimpleTypeKind Kind; -}; -} - -/// The names here all end in "*". If the simple type is a pointer type, we -/// return the whole name. Otherwise we lop off the last character in our -/// StringRef. -static const SimpleTypeEntry SimpleTypeNames[] = { - {"void*", SimpleTypeKind::Void}, - {"<not translated>*", SimpleTypeKind::NotTranslated}, - {"HRESULT*", SimpleTypeKind::HResult}, - {"signed char*", SimpleTypeKind::SignedCharacter}, - {"unsigned char*", SimpleTypeKind::UnsignedCharacter}, - {"char*", SimpleTypeKind::NarrowCharacter}, - {"wchar_t*", SimpleTypeKind::WideCharacter}, - {"char16_t*", SimpleTypeKind::Character16}, - {"char32_t*", SimpleTypeKind::Character32}, - {"__int8*", SimpleTypeKind::SByte}, - {"unsigned __int8*", SimpleTypeKind::Byte}, - {"short*", SimpleTypeKind::Int16Short}, - {"unsigned short*", SimpleTypeKind::UInt16Short}, - {"__int16*", SimpleTypeKind::Int16}, - {"unsigned __int16*", SimpleTypeKind::UInt16}, - {"long*", SimpleTypeKind::Int32Long}, - {"unsigned long*", SimpleTypeKind::UInt32Long}, - {"int*", SimpleTypeKind::Int32}, - {"unsigned*", SimpleTypeKind::UInt32}, - {"__int64*", SimpleTypeKind::Int64Quad}, - {"unsigned __int64*", SimpleTypeKind::UInt64Quad}, - {"__int64*", SimpleTypeKind::Int64}, - {"unsigned __int64*", SimpleTypeKind::UInt64}, - {"__int128*", SimpleTypeKind::Int128}, - {"unsigned __int128*", SimpleTypeKind::UInt128}, - {"__half*", SimpleTypeKind::Float16}, - {"float*", SimpleTypeKind::Float32}, - {"float*", SimpleTypeKind::Float32PartialPrecision}, - {"__float48*", SimpleTypeKind::Float48}, - {"double*", SimpleTypeKind::Float64}, - {"long double*", SimpleTypeKind::Float80}, - {"__float128*", SimpleTypeKind::Float128}, - {"_Complex float*", SimpleTypeKind::Complex32}, - {"_Complex double*", SimpleTypeKind::Complex64}, - {"_Complex long double*", SimpleTypeKind::Complex80}, - {"_Complex __float128*", SimpleTypeKind::Complex128}, - {"bool*", SimpleTypeKind::Boolean8}, - {"__bool16*", SimpleTypeKind::Boolean16}, - {"__bool32*", SimpleTypeKind::Boolean32}, - {"__bool64*", SimpleTypeKind::Boolean64}, -}; - TypeDatabase::TypeDatabase(uint32_t Capacity) : TypeNameStorage(Allocator) { CVUDTNames.resize(Capacity); TypeRecords.resize(Capacity); @@ -103,22 +50,8 @@ StringRef TypeDatabase::saveTypeName(StringRef TypeName) { } StringRef TypeDatabase::getTypeName(TypeIndex Index) const { - if (Index.isNoneType()) - return "<no type>"; - - if (Index.isSimple()) { - // This is a simple type. - for (const auto &SimpleTypeName : SimpleTypeNames) { - if (SimpleTypeName.Kind == Index.getSimpleKind()) { - if (Index.getSimpleMode() == SimpleTypeMode::Direct) - return SimpleTypeName.Name.drop_back(1); - // Otherwise, this is a pointer type. We gloss over the distinction - // between near, far, 64, 32, etc, and just give a pointer type. - return SimpleTypeName.Name; - } - } - return "<unknown simple type>"; - } + if (Index.isNoneType() || Index.isSimple()) + return TypeIndex::simpleTypeName(Index); if (contains(Index)) return CVUDTNames[Index.toArrayIndex()]; diff --git a/lib/DebugInfo/CodeView/TypeIndex.cpp b/lib/DebugInfo/CodeView/TypeIndex.cpp index 20ba6470cd5b..24fe5fcb28d4 100644 --- a/lib/DebugInfo/CodeView/TypeIndex.cpp +++ b/lib/DebugInfo/CodeView/TypeIndex.cpp @@ -15,11 +15,88 @@ using namespace llvm; using namespace llvm::codeview; +namespace { +struct SimpleTypeEntry { + StringRef Name; + SimpleTypeKind Kind; +}; + +/// The names here all end in "*". If the simple type is a pointer type, we +/// return the whole name. Otherwise we lop off the last character in our +/// StringRef. +static const SimpleTypeEntry SimpleTypeNames[] = { + {"void*", SimpleTypeKind::Void}, + {"<not translated>*", SimpleTypeKind::NotTranslated}, + {"HRESULT*", SimpleTypeKind::HResult}, + {"signed char*", SimpleTypeKind::SignedCharacter}, + {"unsigned char*", SimpleTypeKind::UnsignedCharacter}, + {"char*", SimpleTypeKind::NarrowCharacter}, + {"wchar_t*", SimpleTypeKind::WideCharacter}, + {"char16_t*", SimpleTypeKind::Character16}, + {"char32_t*", SimpleTypeKind::Character32}, + {"__int8*", SimpleTypeKind::SByte}, + {"unsigned __int8*", SimpleTypeKind::Byte}, + {"short*", SimpleTypeKind::Int16Short}, + {"unsigned short*", SimpleTypeKind::UInt16Short}, + {"__int16*", SimpleTypeKind::Int16}, + {"unsigned __int16*", SimpleTypeKind::UInt16}, + {"long*", SimpleTypeKind::Int32Long}, + {"unsigned long*", SimpleTypeKind::UInt32Long}, + {"int*", SimpleTypeKind::Int32}, + {"unsigned*", SimpleTypeKind::UInt32}, + {"__int64*", SimpleTypeKind::Int64Quad}, + {"unsigned __int64*", SimpleTypeKind::UInt64Quad}, + {"__int64*", SimpleTypeKind::Int64}, + {"unsigned __int64*", SimpleTypeKind::UInt64}, + {"__int128*", SimpleTypeKind::Int128}, + {"unsigned __int128*", SimpleTypeKind::UInt128}, + {"__half*", SimpleTypeKind::Float16}, + {"float*", SimpleTypeKind::Float32}, + {"float*", SimpleTypeKind::Float32PartialPrecision}, + {"__float48*", SimpleTypeKind::Float48}, + {"double*", SimpleTypeKind::Float64}, + {"long double*", SimpleTypeKind::Float80}, + {"__float128*", SimpleTypeKind::Float128}, + {"_Complex float*", SimpleTypeKind::Complex32}, + {"_Complex double*", SimpleTypeKind::Complex64}, + {"_Complex long double*", SimpleTypeKind::Complex80}, + {"_Complex __float128*", SimpleTypeKind::Complex128}, + {"bool*", SimpleTypeKind::Boolean8}, + {"__bool16*", SimpleTypeKind::Boolean16}, + {"__bool32*", SimpleTypeKind::Boolean32}, + {"__bool64*", SimpleTypeKind::Boolean64}, +}; +} // namespace + +StringRef TypeIndex::simpleTypeName(TypeIndex TI) { + assert(TI.isNoneType() || TI.isSimple()); + + if (TI.isNoneType()) + return "<no type>"; + + // This is a simple type. + for (const auto &SimpleTypeName : SimpleTypeNames) { + if (SimpleTypeName.Kind == TI.getSimpleKind()) { + if (TI.getSimpleMode() == SimpleTypeMode::Direct) + return SimpleTypeName.Name.drop_back(1); + // Otherwise, this is a pointer type. We gloss over the distinction + // between near, far, 64, 32, etc, and just give a pointer type. + return SimpleTypeName.Name; + } + } + return "<unknown simple type>"; +} + void llvm::codeview::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, TypeIndex TI, TypeCollection &Types) { StringRef TypeName; - if (!TI.isNoneType()) - TypeName = Types.getTypeName(TI); + if (!TI.isNoneType()) { + if (TI.isSimple()) + TypeName = TypeIndex::simpleTypeName(TI); + else + TypeName = Types.getTypeName(TI); + } + if (!TypeName.empty()) Printer.printHex(FieldName, TypeName, TI.getIndex()); else diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index 11e2e215303c..8704cea60786 100644 --- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -34,7 +34,7 @@ static inline PointerMode getPointerMode(uint32_t Attrs) { static inline bool isMemberPointer(uint32_t Attrs) { PointerMode Mode = getPointerMode(Attrs); return Mode == PointerMode::PointerToDataMember || - Mode == PointerMode::PointerToDataMember; + Mode == PointerMode::PointerToMemberFunction; } static inline uint32_t getEncodedIntegerLength(ArrayRef<uint8_t> Data) { diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 97b52f0fbdd6..87009bf1b6a1 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -55,6 +55,13 @@ bool DWARFAcceleratorTable::extract() { return true; } +uint32_t DWARFAcceleratorTable::getNumBuckets() { return Hdr.NumBuckets; } +uint32_t DWARFAcceleratorTable::getNumHashes() { return Hdr.NumHashes; } +uint32_t DWARFAcceleratorTable::getSizeHdr() { return sizeof(Hdr); } +uint32_t DWARFAcceleratorTable::getHeaderDataLength() { + return Hdr.HeaderDataLength; +} + LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { // Dump the header. OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n' diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 42ab48808f9a..9bafcde57f0a 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -425,248 +425,6 @@ DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { return DWARFDie(); } -namespace { - -class Verifier { - raw_ostream &OS; - DWARFContext &DCtx; -public: - Verifier(raw_ostream &S, DWARFContext &D) : OS(S), DCtx(D) {} - - bool HandleDebugInfo() { - bool Success = true; - // A map that tracks all references (converted absolute references) so we - // can verify each reference points to a valid DIE and not an offset that - // lies between to valid DIEs. - std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets; - - OS << "Verifying .debug_info...\n"; - for (const auto &CU : DCtx.compile_units()) { - unsigned NumDies = CU->getNumDIEs(); - for (unsigned I = 0; I < NumDies; ++I) { - auto Die = CU->getDIEAtIndex(I); - const auto Tag = Die.getTag(); - if (Tag == DW_TAG_null) - continue; - for (auto AttrValue : Die.attributes()) { - const auto Attr = AttrValue.Attr; - const auto Form = AttrValue.Value.getForm(); - switch (Attr) { - case DW_AT_ranges: - // Make sure the offset in the DW_AT_ranges attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { - Success = false; - OS << "error: DW_AT_ranges offset is beyond .debug_ranges " - "bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - case DW_AT_stmt_list: - // Make sure the offset in the DW_AT_stmt_list attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DCtx.getLineSection().Data.size()) { - Success = false; - OS << "error: DW_AT_stmt_list offset is beyond .debug_line " - "bounds: " - << format("0x%08" PRIx32, *SectionOffset) << "\n"; - CU->getUnitDIE().dump(OS, 0); - OS << "\n"; - } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - - default: - break; - } - switch (Form) { - case DW_FORM_ref1: - case DW_FORM_ref2: - case DW_FORM_ref4: - case DW_FORM_ref8: - case DW_FORM_ref_udata: { - // Verify all CU relative references are valid CU offsets. - Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal) { - auto DieCU = Die.getDwarfUnit(); - auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); - auto CUOffset = AttrValue.Value.getRawUValue(); - if (CUOffset >= CUSize) { - Success = false; - OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx32, CUOffset) - << " is invalid (must be less than CU size of " - << format("0x%08" PRIx32, CUSize) << "):\n"; - Die.dump(OS, 0); - OS << "\n"; - } else { - // Valid reference, but we will verify it points to an actual - // DIE later. - ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); - } - } - break; - } - case DW_FORM_ref_addr: { - // Verify all absolute DIE references have valid offsets in the - // .debug_info section. - Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal) { - if(*RefVal >= DCtx.getInfoSection().Data.size()) { - Success = false; - OS << "error: DW_FORM_ref_addr offset beyond .debug_info " - "bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } else { - // Valid reference, but we will verify it points to an actual - // DIE later. - ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); - } - } - break; - } - case DW_FORM_strp: { - auto SecOffset = AttrValue.Value.getAsSectionOffset(); - assert(SecOffset); // DW_FORM_strp is a section offset. - if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { - Success = false; - OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - } - default: - break; - } - } - } - } - - // Take all references and make sure they point to an actual DIE by - // getting the DIE by offset and emitting an error - OS << "Verifying .debug_info references...\n"; - for (auto Pair: ReferenceToDIEOffsets) { - auto Die = DCtx.getDIEForOffset(Pair.first); - if (Die) - continue; - Success = false; - OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) - << ". Offset is in between DIEs:\n"; - for (auto Offset: Pair.second) { - auto ReferencingDie = DCtx.getDIEForOffset(Offset); - ReferencingDie.dump(OS, 0); - OS << "\n"; - } - OS << "\n"; - } - return Success; - } - - bool HandleDebugLine() { - std::map<uint64_t, DWARFDie> StmtListToDie; - bool Success = true; - OS << "Verifying .debug_line...\n"; - for (const auto &CU : DCtx.compile_units()) { - uint32_t LineTableOffset = 0; - auto CUDie = CU->getUnitDIE(); - auto StmtFormValue = CUDie.find(DW_AT_stmt_list); - if (!StmtFormValue) { - // No line table for this compile unit. - continue; - } - // Get the attribute value as a section offset. No need to produce an - // error here if the encoding isn't correct because we validate this in - // the .debug_info verifier. - if (auto StmtSectionOffset = toSectionOffset(StmtFormValue)) { - LineTableOffset = *StmtSectionOffset; - if (LineTableOffset >= DCtx.getLineSection().Data.size()) { - // Make sure we don't get a valid line table back if the offset - // is wrong. - assert(DCtx.getLineTableForUnit(CU.get()) == nullptr); - // Skip this line table as it isn't valid. No need to create an error - // here because we validate this in the .debug_info verifier. - continue; - } else { - auto Iter = StmtListToDie.find(LineTableOffset); - if (Iter != StmtListToDie.end()) { - Success = false; - OS << "error: two compile unit DIEs, " - << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " - << format("0x%08" PRIx32, CUDie.getOffset()) - << ", have the same DW_AT_stmt_list section offset:\n"; - Iter->second.dump(OS, 0); - CUDie.dump(OS, 0); - OS << '\n'; - // Already verified this line table before, no need to do it again. - continue; - } - StmtListToDie[LineTableOffset] = CUDie; - } - } - auto LineTable = DCtx.getLineTableForUnit(CU.get()); - if (!LineTable) { - Success = false; - OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) - << "] was not able to be parsed for CU:\n"; - CUDie.dump(OS, 0); - OS << '\n'; - continue; - } - uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); - uint64_t PrevAddress = 0; - uint32_t RowIndex = 0; - for (const auto &Row : LineTable->Rows) { - if (Row.Address < PrevAddress) { - Success = false; - OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) - << "] row[" << RowIndex - << "] decreases in address from previous row:\n"; - - DWARFDebugLine::Row::dumpTableHeader(OS); - if (RowIndex > 0) - LineTable->Rows[RowIndex - 1].dump(OS); - Row.dump(OS); - OS << '\n'; - } - - if (Row.File > MaxFileIndex) { - Success = false; - OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) - << "][" << RowIndex << "] has invalid file index " << Row.File - << " (valid values are [1," << MaxFileIndex << "]):\n"; - DWARFDebugLine::Row::dumpTableHeader(OS); - Row.dump(OS); - OS << '\n'; - } - if (Row.EndSequence) - PrevAddress = 0; - else - PrevAddress = Row.Address; - ++RowIndex; - } - } - return Success; - } -}; - -} // anonymous namespace - bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { bool Success = true; DWARFVerifier verifier(OS, *this); @@ -678,8 +436,13 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { if (!verifier.handleDebugLine()) Success = false; } + if (DumpType == DIDT_All || DumpType == DIDT_AppleNames) { + if (!verifier.handleAppleNames()) + Success = false; + } return Success; } + const DWARFUnitIndex &DWARFContext::getCUIndex() { if (CUIndex) return *CUIndex; @@ -1250,7 +1013,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, continue; RelSecName = RelSecName.substr( - RelSecName.find_first_not_of("._")); // Skip . and _ prefixes. + RelSecName.find_first_not_of("._z")); // Skip . and _ prefixes. // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index e6e007896cc8..cf9fec2b3254 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -514,6 +514,20 @@ static uint64_t readPointer(const DataExtractor &Data, uint32_t &Offset, } } +// This is a workaround for old compilers which do not allow +// noreturn attribute usage in lambdas. Once the support for those +// compilers are phased out, we can remove this and return back to +// a ReportError lambda: [StartOffset](const char *ErrorMsg). +#define ReportError(ErrorMsg) ReportErrorImpl(StartOffset,ErrorMsg) +static void LLVM_ATTRIBUTE_NORETURN +ReportErrorImpl(uint32_t StartOffset, const char *ErrorMsg) { + std::string Str; + raw_string_ostream OS(Str); + OS << format(ErrorMsg, StartOffset); + OS.flush(); + report_fatal_error(Str); +} + void DWARFDebugFrame::parse(DataExtractor Data) { uint32_t Offset = 0; DenseMap<uint32_t, CIE *> CIEs; @@ -521,14 +535,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) { while (Data.isValidOffset(Offset)) { uint32_t StartOffset = Offset; - auto ReportError = [StartOffset](const char *ErrorMsg) { - std::string Str; - raw_string_ostream OS(Str); - OS << format(ErrorMsg, StartOffset); - OS.flush(); - report_fatal_error(Str); - }; - bool IsDWARF64 = false; uint64_t Length = Data.getU32(&Offset); uint64_t Id; @@ -585,7 +591,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) { switch (AugmentationString[i]) { default: ReportError("Unknown augmentation character in entry at %lx"); - llvm_unreachable("ReportError should not return."); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 8a544296f65c..a6240fb60143 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/Support/raw_ostream.h" #include <map> #include <set> @@ -275,3 +276,36 @@ bool DWARFVerifier::handleDebugLine() { verifyDebugLineRows(); return NumDebugLineErrors == 0; } + +bool DWARFVerifier::handleAppleNames() { + NumAppleNamesErrors = 0; + OS << "Verifying .apple_names...\n"; + + DataExtractor AppleNamesSection(DCtx.getAppleNamesSection().Data, + DCtx.isLittleEndian(), 0); + DataExtractor StrData(DCtx.getStringSection(), DCtx.isLittleEndian(), 0); + DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData, + DCtx.getAppleNamesSection().Relocs); + + if (!AppleNames.extract()) { + OS << "error: cannot extract .apple_names accelerator table\n"; + return false; + } + + // Verify that all buckets have a valid hash index or are empty + uint32_t NumBuckets = AppleNames.getNumBuckets(); + uint32_t NumHashes = AppleNames.getNumHashes(); + + uint32_t BucketsOffset = + AppleNames.getSizeHdr() + AppleNames.getHeaderDataLength(); + + for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) { + uint32_t HashIdx = AppleNamesSection.getU32(&BucketsOffset); + if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) { + OS << format("error: Bucket[%d] has invalid hash index: [%d]\n", + BucketIdx, HashIdx); + ++NumAppleNamesErrors; + } + } + return NumAppleNamesErrors == 0; +} diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 396dffaa68b1..81a9d3eeec61 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -177,7 +177,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, } void DbiModuleDescriptorBuilder::addDebugSubsection( - std::unique_ptr<DebugSubsection> Subsection) { + std::shared_ptr<DebugSubsection> Subsection) { assert(Subsection); C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>( std::move(Subsection), CodeViewContainer::Pdb)); diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index 355c7b57f4d1..e7304b444f23 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -45,10 +45,6 @@ void DbiStreamBuilder::setFlags(uint16_t F) { Flags = F; } void DbiStreamBuilder::setMachineType(PDB_Machine M) { MachineType = M; } -void DbiStreamBuilder::setSectionContribs(ArrayRef<SectionContrib> Arr) { - SectionContribs = Arr; -} - void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) { SectionMap = SecMap; } @@ -293,23 +289,17 @@ static uint16_t toSecMapFlags(uint32_t Flags) { return Ret; } -// A utility function to create Section Contributions -// for a given input sections. -std::vector<SectionContrib> DbiStreamBuilder::createSectionContribs( - ArrayRef<object::coff_section> SecHdrs) { - std::vector<SectionContrib> Ret; - - // Create a SectionContrib for each input section. - for (auto &Sec : SecHdrs) { - Ret.emplace_back(); - auto &Entry = Ret.back(); - memset(&Entry, 0, sizeof(Entry)); - - Entry.Off = Sec.PointerToRawData; - Entry.Size = Sec.SizeOfRawData; - Entry.Characteristics = Sec.Characteristics; - } - return Ret; +void DbiStreamBuilder::addSectionContrib(DbiModuleDescriptorBuilder *ModuleDbi, + const object::coff_section *SecHdr) { + SectionContrib SC; + memset(&SC, 0, sizeof(SC)); + SC.ISect = (uint16_t)~0U; // This represents nil. + SC.Off = SecHdr->PointerToRawData; + SC.Size = SecHdr->SizeOfRawData; + SC.Characteristics = SecHdr->Characteristics; + // Use the module index in the module dbi stream or nil (-1). + SC.Imod = ModuleDbi ? ModuleDbi->getModuleIndex() : (uint16_t)~0U; + SectionContribs.emplace_back(SC); } // A utility function to create a Section Map for a given list of COFF sections. @@ -372,7 +362,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (!SectionContribs.empty()) { if (auto EC = Writer.writeEnum(DbiSecContribVer60)) return EC; - if (auto EC = Writer.writeArray(SectionContribs)) + if (auto EC = Writer.writeArray(makeArrayRef(SectionContribs))) return EC; } diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp index 7c6069652da6..a3979d480bf4 100644 --- a/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -102,6 +102,10 @@ InfoStream::named_streams() const { return NamedStreams.entries(); } +bool InfoStream::containsIdStream() const { + return !!(Features & PdbFeatureContainsIdStream); +} + PdbRaw_ImplVer InfoStream::getVersion() const { return static_cast<PdbRaw_ImplVer>(Version); } diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 1254e23c73eb..a9597cdf4c4d 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -363,6 +363,16 @@ Expected<PDBStringTable &> PDBFile::getStringTable() { return *Strings; } +uint32_t PDBFile::getPointerSize() { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return 0; + PDB_Machine Machine = DbiS->getMachineType(); + if (Machine == PDB_Machine::Amd64) + return 8; + return 4; +} + bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } bool PDBFile::hasPDBGlobalsStream() { diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 2c6465e6fb2a..12b0c3b36c1d 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -80,6 +80,16 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { } Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { + + if (Ipi && Ipi->getRecordCount() > 0) { + // In theory newer PDBs always have an ID stream, but by saying that we're + // only going to *really* have an ID stream if there is at least one ID + // record, we leave open the opportunity to test older PDBs such as those + // that don't have an ID stream. + auto &Info = getInfoBuilder(); + Info.addFeature(PdbRaw_FeatureSig::VC140); + } + uint32_t StringsLen = Strings.calculateSerializedSize(); if (auto EC = addNamedStream("/names", StringsLen)) diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index 6013c342cf02..f9f8ac219d35 100644 --- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -56,7 +56,8 @@ Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { return Error::success(); } -codeview::DebugStringTableSubsectionRef PDBStringTable::getStringTable() const { +const codeview::DebugStringTableSubsectionRef & +PDBStringTable::getStringTable() const { return Strings; } diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp index a472181a4895..90acfadd311f 100644 --- a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -52,6 +52,11 @@ uint32_t PDBStringTableBuilder::calculateSerializedSize() const { return Size; } +void PDBStringTableBuilder::setStrings( + const codeview::DebugStringTableSubsection &Strings) { + this->Strings = Strings; +} + Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { // Write a header PDBStringTableHeader H; diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 091ac67035dc..8f3474b9ce19 100644 --- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -130,4 +130,13 @@ PublicsStream::getSymbols(bool *HadError) const { return SS.getSymbols(HadError); } +Expected<const codeview::CVSymbolArray &> +PublicsStream::getSymbolArray() const { + auto SymbolS = Pdb.getPDBSymbolStream(); + if (!SymbolS) + return SymbolS.takeError(); + + return SymbolS->getSymbolArray(); +} + Error PublicsStream::commit() { return Error::success(); } diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp index 16904a5a27ed..91b8d648fcf9 100644 --- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp +++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp index aacefae80c3a..da353cb6977c 100644 --- a/lib/DebugInfo/PDB/UDTLayout.cpp +++ b/lib/DebugInfo/PDB/UDTLayout.cpp @@ -181,13 +181,14 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { if (Data->getDataKind() == PDB_DataKind::Member) Members.push_back(std::move(Data)); else - Other.push_back(std::move(Child)); + Other.push_back(std::move(Data)); } else if (auto VT = unique_dyn_cast<PDBSymbolTypeVTable>(Child)) VTables.push_back(std::move(VT)); else if (auto Func = unique_dyn_cast<PDBSymbolFunc>(Child)) Funcs.push_back(std::move(Func)); - else + else { Other.push_back(std::move(Child)); + } } // We don't want to have any re-allocations in the list of bases, so make diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 9aad3771784d..0453a7f443b5 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -553,12 +553,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); const size_t kMaxSaneLen = 1 << 20; - const size_t kMinDefaultLen = 64; + const size_t kMinDefaultLen = 4096; FuzzingOptions Options; Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; Options.ExperimentalLenControl = Flags.experimental_len_control; - if (Flags.experimental_len_control && Flags.max_len == 64) + if (Flags.experimental_len_control && Flags.max_len == kMinDefaultLen) Options.MaxLen = 1 << 20; Options.UnitTimeoutSec = Flags.timeout; Options.ErrorExitCode = Flags.error_exitcode; diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index f6083282ab61..fbf18357ede6 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -301,7 +301,9 @@ void Fuzzer::SetMaxInputLen(size_t MaxInputLen) { this->MaxInputLen = MaxInputLen; this->MaxMutationLen = MaxInputLen; AllocateCurrentUnitData(); - Printf("INFO: -max_len is not provided, using %zd\n", MaxInputLen); + Printf("INFO: -max_len is not provided; " + "libFuzzer will not generate inputs larger than %zd bytes\n", + MaxInputLen); } void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) { diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp index ea93468ea0ed..6f5c7be41062 100644 --- a/lib/Fuzzer/FuzzerTracePC.cpp +++ b/lib/Fuzzer/FuzzerTracePC.cpp @@ -53,6 +53,17 @@ size_t TracePC::GetTotalPCCoverage() { return Res; } + +void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) { + if (Start == Stop) return; + if (NumModulesWithInline8bitCounters && + ModuleCounters[NumModulesWithInline8bitCounters-1].Start == Start) return; + assert(NumModulesWithInline8bitCounters < + sizeof(ModuleCounters) / sizeof(ModuleCounters[0])); + ModuleCounters[NumModulesWithInline8bitCounters++] = {Start, Stop}; + NumInline8bitCounters += Stop - Start; +} + void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) { if (Start == Stop || *Start) return; assert(NumModules < sizeof(Modules) / sizeof(Modules[0])); @@ -76,6 +87,13 @@ void TracePC::PrintModuleInfo() { for (size_t i = 0; i < NumModules; i++) Printf("[%p, %p), ", Modules[i].Start, Modules[i].Stop); Printf("\n"); + if (NumModulesWithInline8bitCounters) { + Printf("INFO: Loaded %zd modules with %zd inline 8-bit counters\n", + NumModulesWithInline8bitCounters, NumInline8bitCounters); + for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) + Printf("[%p, %p), ", ModuleCounters[i].Start, ModuleCounters[i].Stop); + Printf("\n"); + } } ATTRIBUTE_NO_SANITIZE_ALL @@ -304,6 +322,11 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) { } ATTRIBUTE_INTERFACE +void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) { + fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_ALL void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) { uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0)); diff --git a/lib/Fuzzer/FuzzerTracePC.h b/lib/Fuzzer/FuzzerTracePC.h index 6523fa06005c..5ec8c590b4df 100644 --- a/lib/Fuzzer/FuzzerTracePC.h +++ b/lib/Fuzzer/FuzzerTracePC.h @@ -51,7 +51,8 @@ class TracePC { // How many bits of PC are used from __sanitizer_cov_trace_pc. static const size_t kTracePcBits = 18; - void HandleInit(uint32_t *start, uint32_t *stop); + void HandleInit(uint32_t *Start, uint32_t *Stop); + void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop); void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee); template <class T> void HandleCmp(uintptr_t PC, T Arg1, T Arg2); size_t GetTotalPCCoverage(); @@ -104,6 +105,10 @@ private: size_t NumModules; // linker-initialized. size_t NumGuards; // linker-initialized. + struct { uint8_t *Start, *Stop; } ModuleCounters[4096]; + size_t NumModulesWithInline8bitCounters; // linker-initialized. + size_t NumInline8bitCounters; + uint8_t *Counters() const; uintptr_t *PCs() const; @@ -118,12 +123,24 @@ void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End, size_t FirstFeature, Callback Handle8bitCounter) { typedef uintptr_t LargeType; const size_t Step = sizeof(LargeType) / sizeof(uint8_t); - assert(!(reinterpret_cast<uintptr_t>(Begin) % 64)); - for (auto P = Begin; P < End; P += Step) + const size_t StepMask = Step - 1; + auto P = Begin; + // Iterate by 1 byte until either the alignment boundary or the end. + for (; reinterpret_cast<uintptr_t>(P) & StepMask && P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature + P - Begin, V); + + // Iterate by Step bytes at a time. + for (; P < End; P += Step) if (LargeType Bundle = *reinterpret_cast<const LargeType *>(P)) for (size_t I = 0; I < Step; I++, Bundle >>= 8) if (uint8_t V = Bundle & 0xff) Handle8bitCounter(FirstFeature + P - Begin + I, V); + + // Iterate by 1 byte until the end. + for (; P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature + P - Begin, V); } template <class Callback> // bool Callback(size_t Feature) @@ -145,8 +162,16 @@ void TracePC::CollectFeatures(Callback HandleFeature) const { HandleFeature(Idx * 8 + Bit); }; - ForEachNonZeroByte(Counters, Counters + N, 0, Handle8bitCounter); - ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), N * 8, + size_t FirstFeature = 0; + ForEachNonZeroByte(Counters, Counters + N, FirstFeature, Handle8bitCounter); + FirstFeature += N * 8; + for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { + ForEachNonZeroByte(ModuleCounters[i].Start, ModuleCounters[i].Stop, + FirstFeature, Handle8bitCounter); + FirstFeature += 8 * (ModuleCounters[i].Stop - ModuleCounters[i].Start); + } + + ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), FirstFeature, Handle8bitCounter); if (UseValueProfile) diff --git a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp index dfb6007b7970..b5a61ddca715 100644 --- a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp +++ b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp @@ -9,7 +9,7 @@ #include <cstring> extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size < 16) return 0; + if (Size < 16 || Size > 64) return 0; int64_t x; uint64_t y; memcpy(&x, Data, sizeof(x)); diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index b39938a705f6..1cf6c9502a2b 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -206,6 +206,9 @@ include_directories(..) add_subdirectory(no-coverage) add_subdirectory(trace-pc) add_subdirectory(ubsan) +if (NOT MSVC) + add_subdirectory(inline-8bit-counters) +endif() add_library(LLVMFuzzer-DSO1 SHARED DSO1.cpp) add_library(LLVMFuzzer-DSO2 SHARED DSO2.cpp) diff --git a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp index bbf5ea235c7a..ba963d9b1de8 100644 --- a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp +++ b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp @@ -8,6 +8,7 @@ #include <iostream> extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 64) return 0; int bits = 0; if (Size > 0 && Data[0] == 'F') bits |= 1; if (Size > 1 && Data[1] == 'U') bits |= 2; diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index c8beb4331bfa..812894fd947f 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -772,4 +772,16 @@ TEST(Fuzzer, ForEachNonZeroByte) { Expected = {{108, 1}, {109, 2}, {118, 3}, {120, 4}, {135, 5}, {137, 6}, {146, 7}, {163, 8}}; EXPECT_EQ(Res, Expected); + + Res.clear(); + ForEachNonZeroByte(Ar + 9, Ar + N, 109, CB); + Expected = { {109, 2}, {118, 3}, {120, 4}, + {135, 5}, {137, 6}, {146, 7}, {163, 8}}; + EXPECT_EQ(Res, Expected); + + Res.clear(); + ForEachNonZeroByte(Ar + 9, Ar + N - 9, 109, CB); + Expected = { {109, 2}, {118, 3}, {120, 4}, + {135, 5}, {137, 6}, {146, 7}}; + EXPECT_EQ(Res, Expected); } diff --git a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp index d09542963626..37eeede7cbff 100644 --- a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp +++ b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp @@ -11,6 +11,7 @@ static volatile int Sink; extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 64) return 0; int8_t Ids[256]; memset(Ids, -1, sizeof(Ids)); for (size_t i = 0; i < Size; i++) diff --git a/lib/Fuzzer/test/SimpleHashTest.cpp b/lib/Fuzzer/test/SimpleHashTest.cpp index 99e96cb25dcd..a3f4211ebeef 100644 --- a/lib/Fuzzer/test/SimpleHashTest.cpp +++ b/lib/Fuzzer/test/SimpleHashTest.cpp @@ -26,7 +26,7 @@ static uint32_t simple_hash(const uint8_t *Data, size_t Size) { } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size < 14) + if (Size < 14 || Size > 64) return 0; uint32_t Hash = simple_hash(&Data[0], Size - 4); diff --git a/lib/Fuzzer/test/SingleStrncmpTest.cpp b/lib/Fuzzer/test/SingleStrncmpTest.cpp index b302670fb743..b38c7995d8ff 100644 --- a/lib/Fuzzer/test/SingleStrncmpTest.cpp +++ b/lib/Fuzzer/test/SingleStrncmpTest.cpp @@ -8,6 +8,7 @@ #include <cstring> extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 64) return 0; char *S = (char*)Data; volatile auto Strncmp = &(strncmp); // Make sure strncmp is not inlined. if (Size >= 6 && !Strncmp(S, "qwerty", 6)) { diff --git a/lib/Fuzzer/test/TableLookupTest.cpp b/lib/Fuzzer/test/TableLookupTest.cpp index 8126eeabaf42..4d8ab0611cde 100644 --- a/lib/Fuzzer/test/TableLookupTest.cpp +++ b/lib/Fuzzer/test/TableLookupTest.cpp @@ -15,7 +15,6 @@ const size_t N = 1 << 12; // Define an array of counters that will be understood by libFuzzer // as extra coverage signal. The array must be: // * uint8_t -// * aligned by 64 // * in the section named __libfuzzer_extra_counters. // The target code may declare more than one such array. // @@ -23,7 +22,7 @@ const size_t N = 1 << 12; // depending on whether multiple occurrences of the event 'Idx' // is important to distinguish from one occurrence. #ifdef __linux__ -alignas(64) __attribute__((section("__libfuzzer_extra_counters"))) +__attribute__((section("__libfuzzer_extra_counters"))) #endif static uint8_t Counters[N]; diff --git a/lib/Fuzzer/test/fuzzer-dirs.test b/lib/Fuzzer/test/fuzzer-dirs.test index 3de64f278f5d..622ff5da3a29 100644 --- a/lib/Fuzzer/test/fuzzer-dirs.test +++ b/lib/Fuzzer/test/fuzzer-dirs.test @@ -5,9 +5,13 @@ RUN: echo b > %t/SUB1/SUB2/b RUN: echo c > %t/SUB1/SUB2/SUB3/c RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS SUBDIRS: READ units: 3 -RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/long +RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64 +RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256 +RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024 +RUN: cat %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 > %t/SUB1/f4096 +RUN: cat %t/SUB1/f4096 %t/SUB1/f4096 > %t/SUB1/f8192 RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG -LONG: INFO: -max_len is not provided, using 93 +LONG: INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 8192 bytes RUN: rm -rf %t/SUB1 RUN: not LLVMFuzzer-SimpleTest NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR diff --git a/lib/Fuzzer/test/inline-8bit-counters.test b/lib/Fuzzer/test/inline-8bit-counters.test new file mode 100644 index 000000000000..8747af81451f --- /dev/null +++ b/lib/Fuzzer/test/inline-8bit-counters.test @@ -0,0 +1,4 @@ +REQUIRES: linux +CHECK: INFO: Loaded 1 modules with {{.*}} inline 8-bit counters +CHECK: BINGO +RUN: LLVMFuzzer-SimpleTest-Inline8bitCounters -runs=1000000 -seed=1 2>&1 | FileCheck %s diff --git a/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt b/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt new file mode 100644 index 000000000000..088ab04fe6a0 --- /dev/null +++ b/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt @@ -0,0 +1,12 @@ +# These tests are instrumented with -fsanitize-coverage=inline-8bit-counters + +set(CMAKE_CXX_FLAGS + "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard -fsanitize-coverage=inline-8bit-counters") + +set(Inline8bitCounterTests + SimpleTest + ) + +foreach(Test ${Inline8bitCounterTests}) + add_libfuzzer_test(${Test}-Inline8bitCounters SOURCES ../${Test}.cpp) +endforeach() diff --git a/lib/Fuzzer/test/trace-pc/CMakeLists.txt b/lib/Fuzzer/test/trace-pc/CMakeLists.txt index e800f82cc5dc..572fcc983654 100644 --- a/lib/Fuzzer/test/trace-pc/CMakeLists.txt +++ b/lib/Fuzzer/test/trace-pc/CMakeLists.txt @@ -1,5 +1,4 @@ -# These tests are not instrumented with coverage and don't -# have coverage rt in the binary. +# These tests are instrumented with -fsanitize-coverage=trace-pc set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard -fsanitize-coverage=trace-pc") diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index a20f3f811c8d..3469026ad7ed 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -348,8 +348,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, /// factors factored out. If Folded is false, return null if no factoring was /// possible, to avoid endlessly bouncing an unfoldable expression back into the /// top-level folder. -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, - bool Folded) { +static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) { if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); @@ -404,8 +403,7 @@ static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, /// factors factored out. If Folded is false, return null if no factoring was /// possible, to avoid endlessly bouncing an unfoldable expression back into the /// top-level folder. -static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, - bool Folded) { +static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) { // The alignment of an array is equal to the alignment of the // array element. Note that this is not always true for vectors. if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { @@ -469,8 +467,7 @@ static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, /// any known factors factored out. If Folded is false, return null if no /// factoring was possible, to avoid endlessly bouncing an unfoldable expression /// back into the top-level folder. -static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, - Type *DestTy, +static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy, bool Folded) { if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h index 6c189cf656de..6585304e7674 100644 --- a/lib/IR/ConstantsContext.h +++ b/lib/IR/ConstantsContext.h @@ -55,8 +55,6 @@ public: return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -77,8 +75,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -99,8 +95,6 @@ public: return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -122,8 +116,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -146,8 +138,6 @@ public: return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -173,8 +163,6 @@ public: return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -196,8 +184,6 @@ public: return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - /// Indices - These identify which value to extract. const SmallVector<unsigned, 4> Indices; @@ -230,8 +216,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Indices - These identify the position for the insertion. const SmallVector<unsigned, 4> Indices; @@ -297,8 +281,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index e6c49cad0722..0bf68b4c53bb 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -598,8 +598,7 @@ unsigned DIExpression::ExprOperand::getSize() const { case dwarf::DW_OP_LLVM_fragment: return 3; case dwarf::DW_OP_constu: - case dwarf::DW_OP_plus: - case dwarf::DW_OP_minus: + case dwarf::DW_OP_plus_uconst: return 2; default: return 1; @@ -641,6 +640,7 @@ bool DIExpression::isValid() const { break; } case dwarf::DW_OP_constu: + case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: case dwarf::DW_OP_deref: @@ -664,11 +664,12 @@ DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) { void DIExpression::appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) { if (Offset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); } else if (Offset < 0) { - Ops.push_back(dwarf::DW_OP_minus); + Ops.push_back(dwarf::DW_OP_constu); Ops.push_back(-Offset); + Ops.push_back(dwarf::DW_OP_minus); } } @@ -677,16 +678,23 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const { Offset = 0; return true; } - if (getNumElements() != 2) - return false; - if (Elements[0] == dwarf::DW_OP_plus) { + + if (getNumElements() == 2 && Elements[0] == dwarf::DW_OP_plus_uconst) { Offset = Elements[1]; return true; } - if (Elements[0] == dwarf::DW_OP_minus) { - Offset = -Elements[1]; - return true; + + if (getNumElements() == 3 && Elements[0] == dwarf::DW_OP_constu) { + if (Elements[2] == dwarf::DW_OP_plus) { + Offset = Elements[1]; + return true; + } + if (Elements[2] == dwarf::DW_OP_minus) { + Offset = -Elements[1]; + return true; + } } + return false; } diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 81b02946e1d5..b7fa07c6ffac 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -134,18 +134,17 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, return CI; } -CallInst *IRBuilderBase::CreateElementAtomicMemCpy( - Value *Dst, Value *Src, Value *NumElements, uint32_t ElementSize, - MDNode *TBAATag, MDNode *TBAAStructTag, MDNode *ScopeTag, - MDNode *NoAliasTag) { +CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( + Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); - Value *Ops[] = {Dst, Src, NumElements, getInt32(ElementSize)}; - Type *Tys[] = {Dst->getType(), Src->getType()}; + Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)}; + Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()}; Module *M = BB->getParent()->getParent(); - Value *TheFn = - Intrinsic::getDeclaration(M, Intrinsic::memcpy_element_atomic, Tys); + Value *TheFn = Intrinsic::getDeclaration( + M, Intrinsic::memcpy_element_unordered_atomic, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 0b1bc9a8c270..92e5798dcf21 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -1470,7 +1470,7 @@ void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) { if (E) OrigElements = E->getElements(); std::vector<uint64_t> Elements(OrigElements.size() + 2); - Elements[0] = dwarf::DW_OP_plus; + Elements[0] = dwarf::DW_OP_plus_uconst; Elements[1] = Offset; std::copy(OrigElements.begin(), OrigElements.end(), Elements.begin() + 2); E = DIExpression::get(getContext(), Elements); diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp index 9dd712f9ca13..51c4bae3332e 100644 --- a/lib/IR/ModuleSummaryIndex.cpp +++ b/lib/IR/ModuleSummaryIndex.cpp @@ -56,3 +56,16 @@ ModuleSummaryIndex::getGlobalValueSummary(uint64_t ValueGUID, auto &Summary = VI.getSummaryList()[0]; return Summary.get(); } + +bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const { + auto VI = getValueInfo(GUID); + if (!VI) + return true; + const auto &SummaryList = VI.getSummaryList(); + if (SummaryList.empty()) + return true; + for (auto &I : SummaryList) + if (isGlobalValueLive(I.get())) + return true; + return false; +} diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 5c1b3412840d..819f63520c74 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -1330,6 +1330,14 @@ Verifier::visitModuleFlag(const MDNode *Op, = mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)); Assert(Value, "wchar_size metadata requires constant integer argument"); } + + if (ID->getString() == "Linker Options") { + // If the llvm.linker.options named metadata exists, we assume that the + // bitcode reader has upgraded the module flag. Otherwise the flag might + // have been created by a client directly. + Assert(M.getNamedMetadata("llvm.linker.options"), + "'Linker Options' named metadata no longer supported"); + } } /// Return true if this attribute kind only applies to functions. @@ -4004,10 +4012,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { CS); break; } - case Intrinsic::memcpy_element_atomic: { - ConstantInt *ElementSizeCI = dyn_cast<ConstantInt>(CS.getArgOperand(3)); - Assert(ElementSizeCI, "element size of the element-wise atomic memory " - "intrinsic must be a constant int", + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst *MI = + cast<ElementUnorderedAtomicMemCpyInst>(CS.getInstruction()); + ; + + ConstantInt *ElementSizeCI = + dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", CS); const APInt &ElementSizeVal = ElementSizeCI->getValue(); Assert(ElementSizeVal.isPowerOf2(), @@ -4015,19 +4029,24 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "must be a power of 2", CS); + if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + auto IsValidAlignment = [&](uint64_t Alignment) { return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); }; - uint64_t DstAlignment = CS.getParamAlignment(0), SrcAlignment = CS.getParamAlignment(1); - Assert(IsValidAlignment(DstAlignment), - "incorrect alignment of the destination argument", - CS); + "incorrect alignment of the destination argument", CS); Assert(IsValidAlignment(SrcAlignment), - "incorrect alignment of the source argument", - CS); + "incorrect alignment of the source argument", CS); break; } case Intrinsic::gcroot: diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt index 9e586465025e..1d22c2a11f13 100644 --- a/lib/LLVMBuild.txt +++ b/lib/LLVMBuild.txt @@ -39,6 +39,7 @@ subdirectories = Support TableGen Target + Testing ToolDrivers Transforms diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 9d2a44045d6a..35032fdd33e1 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -364,31 +364,40 @@ LTO::LTO(Config Conf, ThinBackend Backend, // Requires a destructor for MapVector<BitcodeModule>. LTO::~LTO() = default; -// Add the given symbol to the GlobalResolutions map, and resolve its partition. -void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, - SymbolResolution Res, unsigned Partition) { - auto &GlobalRes = GlobalResolutions[Sym.getName()]; - GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); - if (Res.Prevailing) - GlobalRes.IRName = Sym.getIRName(); - - // Set the partition to external if we know it is re-defined by the linker - // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a - // regular object, is referenced from llvm.compiler_used, or was already - // recorded as being referenced from a different partition. - if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || - (GlobalRes.Partition != GlobalResolution::Unknown && - GlobalRes.Partition != Partition)) { - GlobalRes.Partition = GlobalResolution::External; - } else - // First recorded reference, save the current partition. - GlobalRes.Partition = Partition; - - // Flag as visible outside of ThinLTO if visible from a regular object or - // if this is a reference in the regular LTO partition. - GlobalRes.VisibleOutsideThinLTO |= - (Res.VisibleToRegularObj || Sym.isUsed() || - Partition == GlobalResolution::RegularLTO); +// Add the symbols in the given module to the GlobalResolutions map, and resolve +// their partitions. +void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, + ArrayRef<SymbolResolution> Res, + unsigned Partition, bool InSummary) { + auto *ResI = Res.begin(); + auto *ResE = Res.end(); + (void)ResE; + for (const InputFile::Symbol &Sym : Syms) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + + auto &GlobalRes = GlobalResolutions[Sym.getName()]; + GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); + if (Res.Prevailing) + GlobalRes.IRName = Sym.getIRName(); + + // Set the partition to external if we know it is re-defined by the linker + // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a + // regular object, is referenced from llvm.compiler_used, or was already + // recorded as being referenced from a different partition. + if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || + (GlobalRes.Partition != GlobalResolution::Unknown && + GlobalRes.Partition != Partition)) { + GlobalRes.Partition = GlobalResolution::External; + } else + // First recorded reference, save the current partition. + GlobalRes.Partition = Partition; + + // Flag as visible outside of summary if visible from a regular object or + // from a module that does not have a summary. + GlobalRes.VisibleOutsideSummary |= + (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary); + } } static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, @@ -434,46 +443,61 @@ Error LTO::add(std::unique_ptr<InputFile> Input, Error LTO::addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - Expected<bool> HasThinLTOSummary = Input.Mods[ModI].hasSummary(); - if (!HasThinLTOSummary) - return HasThinLTOSummary.takeError(); + Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo(); + if (!LTOInfo) + return LTOInfo.takeError(); + BitcodeModule BM = Input.Mods[ModI]; auto ModSyms = Input.module_symbols(ModI); - if (*HasThinLTOSummary) - return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE); - else - return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE); + addModuleToGlobalRes(ModSyms, {ResI, ResE}, + LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, + LTOInfo->HasSummary); + + if (LTOInfo->IsThinLTO) + return addThinLTO(BM, ModSyms, ResI, ResE); + + Expected<RegularLTOState::AddedModule> ModOrErr = + addRegularLTO(BM, ModSyms, ResI, ResE); + if (!ModOrErr) + return ModOrErr.takeError(); + + if (!LTOInfo->HasSummary) + return linkRegularLTO(std::move(*ModOrErr), /*LivenessFromIndex=*/false); + + // Regular LTO module summaries are added to a dummy module that represents + // the combined regular LTO module. + if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "", -1ull)) + return Err; + RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr)); + return Error::success(); } // Add a regular LTO object to the link. -Error LTO::addRegularLTO(BitcodeModule BM, - ArrayRef<InputFile::Symbol> Syms, - const SymbolResolution *&ResI, - const SymbolResolution *ResE) { - if (!RegularLTO.CombinedModule) { - RegularLTO.CombinedModule = - llvm::make_unique<Module>("ld-temp.o", RegularLTO.Ctx); - RegularLTO.Mover = llvm::make_unique<IRMover>(*RegularLTO.CombinedModule); - } +// The resulting module needs to be linked into the combined LTO module with +// linkRegularLTO. +Expected<LTO::RegularLTOState::AddedModule> +LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + RegularLTOState::AddedModule Mod; Expected<std::unique_ptr<Module>> MOrErr = BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true, /*IsImporting*/ false); if (!MOrErr) return MOrErr.takeError(); - Module &M = **MOrErr; + Mod.M = std::move(*MOrErr); + if (Error Err = M.materializeMetadata()) - return Err; + return std::move(Err); UpgradeDebugInfo(M); ModuleSymbolTable SymTab; SymTab.addModule(&M); - std::vector<GlobalValue *> Keep; - for (GlobalVariable &GV : M.globals()) if (GV.hasAppendingLinkage()) - Keep.push_back(&GV); + Mod.Keep.push_back(&GV); DenseSet<GlobalObject *> AliasedGlobals; for (auto &GA : M.aliases()) @@ -502,7 +526,6 @@ Error LTO::addRegularLTO(BitcodeModule BM, for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Sym, Res, 0); assert(MsymI != MsymE); ModuleSymbolTable::Symbol Msym = *MsymI++; @@ -512,7 +535,7 @@ Error LTO::addRegularLTO(BitcodeModule BM, if (Res.Prevailing) { if (Sym.isUndefined()) continue; - Keep.push_back(GV); + Mod.Keep.push_back(GV); // For symbols re-defined with linker -wrap and -defsym options, // set the linkage to weak to inhibit IPO. The linkage will be // restored by the linker. @@ -527,17 +550,14 @@ Error LTO::addRegularLTO(BitcodeModule BM, (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() || GV->hasAvailableExternallyLinkage()) && !AliasedGlobals.count(cast<GlobalObject>(GV))) { - // Either of the above three types of linkage indicates that the + // Any of the above three types of linkage indicates that the // chosen prevailing symbol will have the same semantics as this copy of - // the symbol, so we can link it with available_externally linkage. We - // only need to do this if the symbol is undefined. - GlobalValue *CombinedGV = - RegularLTO.CombinedModule->getNamedValue(GV->getName()); - if (!CombinedGV || CombinedGV->isDeclaration()) { - Keep.push_back(GV); - GV->setLinkage(GlobalValue::AvailableExternallyLinkage); - cast<GlobalObject>(GV)->setComdat(nullptr); - } + // the symbol, so we may be able to link it with available_externally + // linkage. We will decide later whether to do that when we link this + // module (in linkRegularLTO), based on whether it is undefined. + Mod.Keep.push_back(GV); + GV->setLinkage(GlobalValue::AvailableExternallyLinkage); + cast<GlobalObject>(GV)->setComdat(nullptr); } } // Common resolution: collect the maximum size/alignment over all commons. @@ -555,25 +575,54 @@ Error LTO::addRegularLTO(BitcodeModule BM, // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. } assert(MsymI == MsymE); + return std::move(Mod); +} + +Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, + bool LivenessFromIndex) { + if (!RegularLTO.CombinedModule) { + RegularLTO.CombinedModule = + llvm::make_unique<Module>("ld-temp.o", RegularLTO.Ctx); + RegularLTO.Mover = llvm::make_unique<IRMover>(*RegularLTO.CombinedModule); + } + + std::vector<GlobalValue *> Keep; + for (GlobalValue *GV : Mod.Keep) { + if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) + continue; - return RegularLTO.Mover->move(std::move(*MOrErr), Keep, + if (!GV->hasAvailableExternallyLinkage()) { + Keep.push_back(GV); + continue; + } + + // Only link available_externally definitions if we don't already have a + // definition. + GlobalValue *CombinedGV = + RegularLTO.CombinedModule->getNamedValue(GV->getName()); + if (CombinedGV && !CombinedGV->isDeclaration()) + continue; + + Keep.push_back(GV); + } + + return RegularLTO.Mover->move(std::move(Mod.M), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, /* IsPerformingImport */ false); } -// Add a ThinLTO object to the link. -Error LTO::addThinLTO(BitcodeModule BM, - ArrayRef<InputFile::Symbol> Syms, +// Add a ThinLTO module to the link. +Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE) { if (Error Err = - BM.readSummary(ThinLTO.CombinedIndex, ThinLTO.ModuleMap.size())) + BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), + ThinLTO.ModuleMap.size())) return Err; for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1); if (Res.Prevailing) { if (!Sym.getIRName().empty()) { @@ -601,7 +650,7 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { // Compute "dead" symbols, we don't want to import/export these! DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; for (auto &Res : GlobalResolutions) { - if (Res.second.VisibleOutsideThinLTO && + if (Res.second.VisibleOutsideSummary && // IRName will be defined if we have seen the prevailing copy of // this value. If not, no need to preserve any ThinLTO copies. !Res.second.IRName.empty()) @@ -614,7 +663,8 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { // Save the status of having a regularLTO combined module, as // this is needed for generating the ThinLTO Task ID, and // the CombinedModule will be moved at the end of runRegularLTO. - bool HasRegularLTO = RegularLTO.CombinedModule != nullptr; + bool HasRegularLTO = RegularLTO.CombinedModule != nullptr || + !RegularLTO.ModsWithSummaries.empty(); // Invoke regular LTO if there was a regular LTO module to start with. if (HasRegularLTO) if (auto E = runRegularLTO(AddStream)) @@ -623,6 +673,11 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { } Error LTO::runRegularLTO(AddStreamFn AddStream) { + for (auto &M : RegularLTO.ModsWithSummaries) + if (Error Err = linkRegularLTO(std::move(M), + /*LivenessFromIndex=*/true)) + return Err; + // Make sure commons have the right size/alignment: we kept the largest from // all the prevailing when adding the inputs, and we apply it here. const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); @@ -920,17 +975,6 @@ ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix, }; } -static bool IsLiveByGUID(const ModuleSummaryIndex &Index, - GlobalValue::GUID GUID) { - auto VI = Index.getValueInfo(GUID); - if (!VI) - return false; - for (auto &I : VI.getSummaryList()) - if (Index.isGlobalValueLive(I.get())) - return true; - return false; -} - Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, bool HasRegularLTO) { if (ThinLTO.ModuleMap.empty()) @@ -979,7 +1023,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, auto GUID = GlobalValue::getGUID( GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); // Mark exported unless index-based analysis determined it to be dead. - if (IsLiveByGUID(ThinLTO.CombinedIndex, GUID)) + if (ThinLTO.CombinedIndex.isGUIDLive(GUID)) ExportedGUIDs.insert(GUID); } diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 11f0982c6a60..3cc8b7d0e770 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -77,14 +77,12 @@ bool LTOModule::isBitcodeFile(StringRef Path) { } bool LTOModule::isThinLTO() { - // Right now the detection is only based on the summary presence. We may want - // to add a dedicated flag at some point. - Expected<bool> Result = hasGlobalValueSummary(MBRef); + Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef); if (!Result) { logAllUnhandledErrors(Result.takeError(), errs(), ""); return false; } - return *Result; + return Result->IsThinLTO; } bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, @@ -637,10 +635,10 @@ void LTOModule::parseMetadata() { raw_string_ostream OS(LinkerOpts); // Linker Options - if (Metadata *Val = getModule().getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast<MDNode>(Val); + if (NamedMDNode *LinkerOptions = + getModule().getNamedMetadata("llvm.linker.options")) { for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { - MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + MDNode *MDOptions = LinkerOptions->getOperand(i); for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); OS << " " << MDOption->getString(); diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index f1dfb91aafbb..a407691b0bd1 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -603,6 +603,8 @@ EndStmt: Type = ELF::SHT_NOTE; else if (TypeName == "unwind") Type = ELF::SHT_X86_64_UNWIND; + else if (TypeName == "llvm_odrtab") + Type = ELF::SHT_LLVM_ODRTAB; else if (TypeName.getAsInteger(0, Type)) return TokError("unknown section type"); } diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index a75068ebf05a..2f4f61aa4d50 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -147,6 +147,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, // Print hex value of the flag while we do not have // any standard symbolic representation of the flag. OS << "0x7000001e"; + else if (Type == ELF::SHT_LLVM_ODRTAB) + OS << "llvm_odrtab"; else report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) + " for section " + getSectionName()); diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 4b3dc6e0c211..db304c027f99 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -181,7 +181,10 @@ class WasmObjectWriter : public MCObjectWriter { // Index values to use for fixing up call_indirect type indices. // Maps function symbols to the index of the type of the function DenseMap<const MCSymbolWasm *, uint32_t> TypeIndices; - + // Maps function symbols to the table element index space. Used + // for TABLE_INDEX relocation types (i.e. address taken functions). + DenseMap<const MCSymbolWasm *, uint32_t> IndirectSymbolIndices; + // Maps function/global symbols to the function/global index space. DenseMap<const MCSymbolWasm *, uint32_t> SymbolIndices; DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo> @@ -189,9 +192,8 @@ class WasmObjectWriter : public MCObjectWriter { // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const { - return TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); + unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const { + return TargetObjectWriter->getRelocType(Target, Fixup); } void startSection(SectionBookkeeping &Section, unsigned SectionId, @@ -210,6 +212,7 @@ private: DataRelocations.clear(); TypeIndices.clear(); SymbolIndices.clear(); + IndirectSymbolIndices.clear(); FunctionTypeIndices.clear(); MCObjectWriter::reset(); } @@ -233,7 +236,7 @@ private: void writeTypeSection(const SmallVector<WasmFunctionType, 4> &FunctionTypes); void writeImportSection(const SmallVector<WasmImport, 4> &Imports); void writeFunctionSection(const SmallVector<WasmFunction, 4> &Functions); - void writeTableSection(const SmallVector<uint32_t, 4> &TableElems); + void writeTableSection(uint32_t NumElements); void writeMemorySection(const SmallVector<char, 0> &DataBytes); void writeGlobalSection(const SmallVector<WasmGlobal, 4> &Globals); void writeExportSection(const SmallVector<WasmExport, 4> &Exports); @@ -402,7 +405,9 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, SymA->setUsedInReloc(); } - unsigned Type = getRelocType(Ctx, Target, Fixup, IsPCRel); + assert(!IsPCRel); + unsigned Type = getRelocType(Target, Fixup); + WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); if (FixupSection.hasInstructions()) @@ -464,9 +469,11 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) { uint32_t WasmObjectWriter::getRelocationIndexValue( const WasmRelocationEntry &RelEntry) { switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + assert(IndirectSymbolIndices.count(RelEntry.Symbol)); + return IndirectSymbolIndices[RelEntry.Symbol]; + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: @@ -617,21 +624,19 @@ void WasmObjectWriter::writeFunctionSection( endSection(Section); } -void WasmObjectWriter::writeTableSection( - const SmallVector<uint32_t, 4> &TableElems) { +void WasmObjectWriter::writeTableSection(uint32_t NumElements) { // For now, always emit the table section, since indirect calls are not // valid without it. In the future, we could perhaps be more clever and omit // it if there are no indirect calls. + SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_TABLE); - // The number of tables, fixed to 1 for now. - encodeULEB128(1, getStream()); - - encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); - - encodeULEB128(0, getStream()); // flags - encodeULEB128(TableElems.size(), getStream()); // initial + encodeULEB128(1, getStream()); // The number of tables. + // Fixed to 1 for now. + encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); // Type of table + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumElements, getStream()); // initial endSection(Section); } @@ -1072,8 +1077,10 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } // If needed, prepare the function to be called indirectly. - if (IsAddressTaken.count(&WS)) + if (IsAddressTaken.count(&WS)) { + IndirectSymbolIndices[&WS] = TableElems.size(); TableElems.push_back(Index); + } } else { if (WS.isTemporary() && !WS.getSize()) continue; @@ -1180,7 +1187,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, writeTypeSection(FunctionTypes); writeImportSection(Imports); writeFunctionSection(Functions); - writeTableSection(TableElems); + writeTableSection(TableElems.size()); writeMemorySection(DataBytes); writeGlobalSection(Globals); writeExportSection(Exports); diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index e1c35ed6a6a0..4034f9039dda 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -36,7 +36,8 @@ using namespace llvm; NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) - : Buf(MemoryBuffer::getMemBuffer(BufRef, false)) {} + : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), + MemberName(BufRef.getBufferIdentifier()) {} Expected<NewArchiveMember> NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, @@ -48,6 +49,7 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, NewArchiveMember M; assert(M.IsNew == false); M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { auto ModTimeOrErr = OldMember.getLastModified(); if (!ModTimeOrErr) @@ -97,6 +99,7 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, NewArchiveMember M; M.IsNew = true; M.Buf = std::move(*MemberBufferOrErr); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( Status.getLastModificationTime()); @@ -185,7 +188,7 @@ printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, } static bool useStringTable(bool Thin, StringRef Name) { - return Thin || Name.size() >= 16; + return Thin || Name.size() >= 16 || Name.contains('/'); } static void @@ -239,7 +242,7 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, unsigned StartOffset = 0; for (const NewArchiveMember &M : Members) { StringRef Path = M.Buf->getBufferIdentifier(); - StringRef Name = sys::path::filename(Path); + StringRef Name = M.MemberName; if (!useStringTable(Thin, Name)) continue; if (StartOffset == 0) { @@ -423,9 +426,8 @@ llvm::writeArchive(StringRef ArcName, if (Kind == object::Archive::K_DARWIN) Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8); - printMemberHeader(Out, Kind, Thin, - sys::path::filename(M.Buf->getBufferIdentifier()), - StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms, + printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter, + M.ModTime, M.UID, M.GID, M.Perms, M.Buf->getBufferSize() + Padding); if (!Thin) diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 9bc28dc14a29..448fb1bd6b56 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -192,6 +192,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp index d21acdb1d556..a6cd5dda12d3 100644 --- a/lib/Object/IRSymtab.cpp +++ b/lib/Object/IRSymtab.cpp @@ -109,9 +109,9 @@ Error Builder::addModule(Module *M) { if (TT.isOSBinFormatCOFF()) { if (auto E = M->materializeMetadata()) return E; - if (Metadata *Val = M->getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast<MDNode>(Val); - for (const MDOperand &MDOptions : LinkerOptions->operands()) + if (NamedMDNode *LinkerOptions = + M->getNamedMetadata("llvm.linker.options")) { + for (MDNode *MDOptions : LinkerOptions->operands()) for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands()) COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString(); } diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index 041659e7aa23..3f6080d48f9d 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -30,6 +30,10 @@ namespace object { const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); +// COFF files seem to be inconsistent with alignment between sections, just use +// 8-byte because it makes everyone happy. +const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); + static const size_t ResourceMagicSize = 16; static const size_t NullEntrySize = 16; @@ -66,7 +70,7 @@ ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref, const WindowsResource *Owner, Error &Err) : Reader(Ref), OwningRes(Owner) { if (loadNext()) - Err = make_error<GenericBinaryError>("Could not read first entry.", + Err = make_error<GenericBinaryError>("Could not read first entry.\n", object_error::unexpected_eof); } @@ -133,31 +137,35 @@ Error WindowsResourceParser::parse(WindowsResource *WR) { ResourceEntryRef Entry = EntryOrErr.get(); bool End = false; while (!End) { - Data.push_back(Entry.getData()); - if (Entry.checkTypeString()) + bool IsNewTypeString = false; + bool IsNewNameString = false; + + Root.addEntry(Entry, IsNewTypeString, IsNewNameString); + + if (IsNewTypeString) StringTable.push_back(Entry.getTypeString()); - if (Entry.checkNameString()) + if (IsNewNameString) StringTable.push_back(Entry.getNameString()); - Root.addEntry(Entry); - RETURN_IF_ERROR(Entry.moveNext(End)); } return Error::success(); } -void WindowsResourceParser::printTree() const { - ScopedPrinter Writer(outs()); +void WindowsResourceParser::printTree(raw_ostream &OS) const { + ScopedPrinter Writer(OS); Root.print(Writer, "Resource Tree"); } -void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry) { - TreeNode &TypeNode = addTypeNode(Entry); - TreeNode &NameNode = TypeNode.addNameNode(Entry); +void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry, + bool &IsNewTypeString, + bool &IsNewNameString) { + TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString); + TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString); NameNode.addLanguageNode(Entry); } @@ -171,7 +179,6 @@ WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, uint32_t Characteristics) : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), Characteristics(Characteristics) { - if (IsDataNode) DataIndex = DataCount++; } @@ -194,17 +201,19 @@ WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, } WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry) { +WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry, + bool &IsNewTypeString) { if (Entry.checkTypeString()) - return addChild(Entry.getTypeString()); + return addChild(Entry.getTypeString(), IsNewTypeString); else return addChild(Entry.getTypeID()); } WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry) { +WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry, + bool &IsNewNameString) { if (Entry.checkNameString()) - return addChild(Entry.getNameString()); + return addChild(Entry.getNameString(), IsNewNameString); else return addChild(Entry.getNameID()); } @@ -232,7 +241,8 @@ WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild( } WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef) { +WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef, + bool &IsNewString) { std::string NameString; ArrayRef<UTF16> CorrectedName; std::vector<UTF16> EndianCorrectedName; @@ -248,6 +258,7 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef) { auto Child = StringChildren.find(NameString); if (Child == StringChildren.end()) { auto NewChild = createStringNode(); + IsNewString = true; WindowsResourceParser::TreeNode &Node = *NewChild; StringChildren.emplace(NameString, std::move(NewChild)); return Node; @@ -296,7 +307,6 @@ class WindowsResourceCOFFWriter { public: WindowsResourceCOFFWriter(StringRef OutputFile, Machine MachineType, const WindowsResourceParser &Parser, Error &E); - Error write(); private: @@ -314,7 +324,8 @@ private: void writeDirectoryStringTable(); void writeFirstSectionRelocations(); std::unique_ptr<FileOutputBuffer> Buffer; - uint8_t *Current; + uint8_t *BufferStart; + uint64_t CurrentOffset = 0; Machine MachineType; const WindowsResourceParser::TreeNode &Resources; const ArrayRef<std::vector<uint8_t>> Data; @@ -386,6 +397,7 @@ void WindowsResourceCOFFWriter::performSectionOneLayout() { FileSize += SectionOneSize; FileSize += Data.size() * llvm::COFF::RelocationSize; // one relocation for each resource. + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); } void WindowsResourceCOFFWriter::performSectionTwoLayout() { @@ -398,6 +410,7 @@ void WindowsResourceCOFFWriter::performSectionTwoLayout() { SectionTwoSize += llvm::alignTo(Entry.size(), sizeof(uint64_t)); } FileSize += SectionTwoSize; + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); } static std::time_t getTime() { @@ -408,7 +421,7 @@ static std::time_t getTime() { } Error WindowsResourceCOFFWriter::write() { - Current = Buffer->getBufferStart(); + BufferStart = Buffer->getBufferStart(); writeCOFFHeader(); writeFirstSectionHeader(); @@ -427,7 +440,8 @@ Error WindowsResourceCOFFWriter::write() { void WindowsResourceCOFFWriter::writeCOFFHeader() { // Write the COFF header. - auto *Header = reinterpret_cast<llvm::object::coff_file_header *>(Current); + auto *Header = + reinterpret_cast<llvm::object::coff_file_header *>(BufferStart); switch (MachineType) { case Machine::ARM: Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT; @@ -452,9 +466,9 @@ void WindowsResourceCOFFWriter::writeCOFFHeader() { void WindowsResourceCOFFWriter::writeFirstSectionHeader() { // Write the first section header. - Current += sizeof(llvm::object::coff_file_header); - auto *SectionOneHeader = - reinterpret_cast<llvm::object::coff_section *>(Current); + CurrentOffset += sizeof(llvm::object::coff_file_header); + auto *SectionOneHeader = reinterpret_cast<llvm::object::coff_section *>( + BufferStart + CurrentOffset); strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)llvm::COFF::NameSize); SectionOneHeader->VirtualSize = 0; SectionOneHeader->VirtualAddress = 0; @@ -473,9 +487,9 @@ void WindowsResourceCOFFWriter::writeFirstSectionHeader() { void WindowsResourceCOFFWriter::writeSecondSectionHeader() { // Write the second section header. - Current += sizeof(llvm::object::coff_section); - auto *SectionTwoHeader = - reinterpret_cast<llvm::object::coff_section *>(Current); + CurrentOffset += sizeof(llvm::object::coff_section); + auto *SectionTwoHeader = reinterpret_cast<llvm::object::coff_section *>( + BufferStart + CurrentOffset); strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)llvm::COFF::NameSize); SectionTwoHeader->VirtualSize = 0; SectionTwoHeader->VirtualAddress = 0; @@ -492,75 +506,85 @@ void WindowsResourceCOFFWriter::writeSecondSectionHeader() { void WindowsResourceCOFFWriter::writeFirstSection() { // Write section one. - Current += sizeof(llvm::object::coff_section); + CurrentOffset += sizeof(llvm::object::coff_section); writeDirectoryTree(); writeDirectoryStringTable(); writeFirstSectionRelocations(); + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); } void WindowsResourceCOFFWriter::writeSecondSection() { // Now write the .rsrc$02 section. for (auto const &RawDataEntry : Data) { - std::copy(RawDataEntry.begin(), RawDataEntry.end(), Current); - Current += alignTo(RawDataEntry.size(), sizeof(uint64_t)); + std::copy(RawDataEntry.begin(), RawDataEntry.end(), + BufferStart + CurrentOffset); + CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t)); } + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); } void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the symbol table. // First, the feat symbol. - auto *Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current); + auto *Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + + CurrentOffset); strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)llvm::COFF::NameSize); Symbol->Value = 0x11; Symbol->SectionNumber = 0xffff; Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 0; - Current += sizeof(llvm::object::coff_symbol16); + CurrentOffset += sizeof(llvm::object::coff_symbol16); // Now write the .rsrc1 symbol + aux. - Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current); + Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + + CurrentOffset); strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)llvm::COFF::NameSize); Symbol->Value = 0; Symbol->SectionNumber = 1; Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 1; - Current += sizeof(llvm::object::coff_symbol16); - auto *Aux = - reinterpret_cast<llvm::object::coff_aux_section_definition *>(Current); + CurrentOffset += sizeof(llvm::object::coff_symbol16); + auto *Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>( + BufferStart + CurrentOffset); Aux->Length = SectionOneSize; Aux->NumberOfRelocations = Data.size(); Aux->NumberOfLinenumbers = 0; Aux->CheckSum = 0; Aux->NumberLowPart = 0; Aux->Selection = 0; - Current += sizeof(llvm::object::coff_aux_section_definition); + CurrentOffset += sizeof(llvm::object::coff_aux_section_definition); // Now write the .rsrc2 symbol + aux. - Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current); + Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + + CurrentOffset); strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)llvm::COFF::NameSize); Symbol->Value = 0; Symbol->SectionNumber = 2; Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 1; - Current += sizeof(llvm::object::coff_symbol16); - Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>(Current); + CurrentOffset += sizeof(llvm::object::coff_symbol16); + Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>( + BufferStart + CurrentOffset); Aux->Length = SectionTwoSize; Aux->NumberOfRelocations = 0; Aux->NumberOfLinenumbers = 0; Aux->CheckSum = 0; Aux->NumberLowPart = 0; Aux->Selection = 0; - Current += sizeof(llvm::object::coff_aux_section_definition); + CurrentOffset += sizeof(llvm::object::coff_aux_section_definition); // Now write a symbol for each relocation. for (unsigned i = 0; i < Data.size(); i++) { char RelocationName[9]; sprintf(RelocationName, "$R%06X", DataOffsets[i]); - Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current); + Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + + CurrentOffset); strncpy(Symbol->Name.ShortName, RelocationName, (size_t)llvm::COFF::NameSize); Symbol->Value = DataOffsets[i]; @@ -568,14 +592,14 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 0; - Current += sizeof(llvm::object::coff_symbol16); + CurrentOffset += sizeof(llvm::object::coff_symbol16); } } void WindowsResourceCOFFWriter::writeStringTable() { // Just 4 null bytes for the string table. - auto COFFStringTable = reinterpret_cast<uint32_t *>(Current); - *COFFStringTable = 0; + auto COFFStringTable = reinterpret_cast<void *>(BufferStart + CurrentOffset); + memset(COFFStringTable, 0, 4); } void WindowsResourceCOFFWriter::writeDirectoryTree() { @@ -593,8 +617,8 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { while (!Queue.empty()) { auto CurrentNode = Queue.front(); Queue.pop(); - auto *Table = - reinterpret_cast<llvm::object::coff_resource_dir_table *>(Current); + auto *Table = reinterpret_cast<llvm::object::coff_resource_dir_table *>( + BufferStart + CurrentOffset); Table->Characteristics = CurrentNode->getCharacteristics(); Table->TimeDateStamp = 0; Table->MajorVersion = CurrentNode->getMajorVersion(); @@ -603,13 +627,13 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { auto &StringChildren = CurrentNode->getStringChildren(); Table->NumberOfNameEntries = StringChildren.size(); Table->NumberOfIDEntries = IDChildren.size(); - Current += sizeof(llvm::object::coff_resource_dir_table); + CurrentOffset += sizeof(llvm::object::coff_resource_dir_table); CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_table); // Write the directory entries immediately following each directory table. for (auto const &Child : StringChildren) { - auto *Entry = - reinterpret_cast<llvm::object::coff_resource_dir_entry *>(Current); + auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>( + BufferStart + CurrentOffset); Entry->Identifier.NameOffset = StringTableOffsets[Child.second->getStringIndex()]; if (Child.second->checkIsDataNode()) { @@ -624,12 +648,12 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { sizeof(llvm::object::coff_resource_dir_entry); Queue.push(Child.second.get()); } - Current += sizeof(llvm::object::coff_resource_dir_entry); + CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry); CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry); } for (auto const &Child : IDChildren) { - auto *Entry = - reinterpret_cast<llvm::object::coff_resource_dir_entry *>(Current); + auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>( + BufferStart + CurrentOffset); Entry->Identifier.ID = Child.first; if (Child.second->checkIsDataNode()) { Entry->Offset.DataEntryOffset = NextLevelOffset; @@ -643,7 +667,7 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { sizeof(llvm::object::coff_resource_dir_entry); Queue.push(Child.second.get()); } - Current += sizeof(llvm::object::coff_resource_dir_entry); + CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry); CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry); } } @@ -651,14 +675,14 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { RelocationAddresses.resize(Data.size()); // Now write all the resource data entries. for (auto DataNodes : DataEntriesTreeOrder) { - auto *Entry = - reinterpret_cast<llvm::object::coff_resource_data_entry *>(Current); + auto *Entry = reinterpret_cast<llvm::object::coff_resource_data_entry *>( + BufferStart + CurrentOffset); RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; Entry->DataRVA = 0; // Set to zero because it is a relocation. Entry->DataSize = Data[DataNodes->getDataIndex()].size(); Entry->Codepage = 0; Entry->Reserved = 0; - Current += sizeof(llvm::object::coff_resource_data_entry); + CurrentOffset += sizeof(llvm::object::coff_resource_data_entry); CurrentRelativeOffset += sizeof(llvm::object::coff_resource_data_entry); } } @@ -666,17 +690,16 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { void WindowsResourceCOFFWriter::writeDirectoryStringTable() { // Now write the directory string table for .rsrc$01 uint32_t TotalStringTableSize = 0; - for (auto String : StringTable) { - auto *LengthField = reinterpret_cast<uint16_t *>(Current); + for (auto &String : StringTable) { uint16_t Length = String.size(); - *LengthField = Length; - Current += sizeof(uint16_t); - auto *Start = reinterpret_cast<UTF16 *>(Current); + support::endian::write16le(BufferStart + CurrentOffset, Length); + CurrentOffset += sizeof(uint16_t); + auto *Start = reinterpret_cast<UTF16 *>(BufferStart + CurrentOffset); std::copy(String.begin(), String.end(), Start); - Current += Length * sizeof(UTF16); + CurrentOffset += Length * sizeof(UTF16); TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t); } - Current += + CurrentOffset += alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize; } @@ -687,7 +710,8 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { // .rsrc section. uint32_t NextSymbolIndex = 5; for (unsigned i = 0; i < Data.size(); i++) { - auto *Reloc = reinterpret_cast<llvm::object::coff_relocation *>(Current); + auto *Reloc = reinterpret_cast<llvm::object::coff_relocation *>( + BufferStart + CurrentOffset); Reloc->VirtualAddress = RelocationAddresses[i]; Reloc->SymbolTableIndex = NextSymbolIndex++; switch (MachineType) { @@ -703,7 +727,7 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { default: Reloc->Type = 0; } - Current += sizeof(llvm::object::coff_relocation); + CurrentOffset += sizeof(llvm::object::coff_relocation); } } diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp index 7f9f4c1f8c2c..c8cbea1490f6 100644 --- a/lib/ObjectYAML/COFFYAML.cpp +++ b/lib/ObjectYAML/COFFYAML.cpp @@ -488,7 +488,16 @@ void MappingTraits<COFFYAML::Section>::mapping(IO &IO, COFFYAML::Section &Sec) { IO.mapOptional("VirtualAddress", Sec.Header.VirtualAddress, 0U); IO.mapOptional("VirtualSize", Sec.Header.VirtualSize, 0U); IO.mapOptional("Alignment", Sec.Alignment, 0U); - IO.mapRequired("SectionData", Sec.SectionData); + + // If this is a .debug$S or .debug$T section parse the semantic representation + // of the symbols/types. If it is any other kind of section, just deal in raw + // bytes. + IO.mapOptional("SectionData", Sec.SectionData); + if (Sec.Name == ".debug$S") + IO.mapOptional("Subsections", Sec.DebugS); + else if (Sec.Name == ".debug$T") + IO.mapOptional("Types", Sec.DebugT); + IO.mapOptional("Relocations", Sec.Relocations); } diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp index 08a4bb715fac..d194420d5ef4 100644 --- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp +++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -28,6 +28,7 @@ #include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" #include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/ObjectYAML/CodeViewYAMLSymbols.h" @@ -75,10 +76,9 @@ struct YAMLSubsectionBase { virtual ~YAMLSubsectionBase() {} virtual void map(IO &IO) = 0; - virtual std::unique_ptr<DebugSubsection> + virtual std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *UseStrings, - DebugChecksumsSubsection *UseChecksums) const = 0; + const codeview::StringsAndChecksums &SC) const = 0; }; } } @@ -90,10 +90,9 @@ struct YAMLChecksumsSubsection : public YAMLSubsectionBase { : YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLChecksumsSubsection>> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, const DebugChecksumsSubsectionRef &FC); @@ -105,10 +104,9 @@ struct YAMLLinesSubsection : public YAMLSubsectionBase { YAMLLinesSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Lines) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLLinesSubsection>> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, const DebugChecksumsSubsectionRef &Checksums, @@ -122,10 +120,9 @@ struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase { : YAMLSubsectionBase(DebugSubsectionKind::InlineeLines) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLInlineeLinesSubsection>> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, const DebugChecksumsSubsectionRef &Checksums, @@ -139,10 +136,9 @@ struct YAMLCrossModuleExportsSubsection : public YAMLSubsectionBase { : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeExports) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLCrossModuleExportsSubsection>> fromCodeViewSubsection(const DebugCrossModuleExportsSubsectionRef &Exports); @@ -154,10 +150,9 @@ struct YAMLCrossModuleImportsSubsection : public YAMLSubsectionBase { : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeImports) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLCrossModuleImportsSubsection>> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, const DebugCrossModuleImportsSubsectionRef &Imports); @@ -169,10 +164,9 @@ struct YAMLSymbolsSubsection : public YAMLSubsectionBase { YAMLSymbolsSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Symbols) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLSymbolsSubsection>> fromCodeViewSubsection(const DebugSymbolsSubsectionRef &Symbols); @@ -184,10 +178,9 @@ struct YAMLStringTableSubsection : public YAMLSubsectionBase { : YAMLSubsectionBase(DebugSubsectionKind::StringTable) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLStringTableSubsection>> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings); @@ -199,10 +192,9 @@ struct YAMLFrameDataSubsection : public YAMLSubsectionBase { : YAMLSubsectionBase(DebugSubsectionKind::FrameData) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLFrameDataSubsection>> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, const DebugFrameDataSubsectionRef &Frames); @@ -215,10 +207,9 @@ struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase { : YAMLSubsectionBase(DebugSubsectionKind::CoffSymbolRVA) {} void map(IO &IO) override; - std::unique_ptr<DebugSubsection> + std::shared_ptr<DebugSubsection> toCodeViewSubsection(BumpPtrAllocator &Allocator, - DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const override; + const codeview::StringsAndChecksums &SC) const override; static Expected<std::shared_ptr<YAMLCoffSymbolRVASubsection>> fromCodeViewSubsection(const DebugSymbolRVASubsectionRef &RVAs); @@ -389,34 +380,23 @@ void MappingTraits<YAMLDebugSubsection>::mapping( Subsection.Subsection->map(IO); } -static std::shared_ptr<YAMLChecksumsSubsection> -findChecksums(ArrayRef<YAMLDebugSubsection> Subsections) { - for (const auto &SS : Subsections) { - if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) { - return std::static_pointer_cast<YAMLChecksumsSubsection>(SS.Subsection); - } - } - - return nullptr; -} - -std::unique_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings, - DebugChecksumsSubsection *UseChecksums) const { - assert(UseStrings && !UseChecksums); - auto Result = llvm::make_unique<DebugChecksumsSubsection>(*UseStrings); +std::shared_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + auto Result = std::make_shared<DebugChecksumsSubsection>(*SC.strings()); for (const auto &CS : Checksums) { Result->addChecksum(CS.FileName, CS.Kind, CS.ChecksumBytes.Bytes); } - return std::move(Result); + return Result; } -std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings, - DebugChecksumsSubsection *UseChecksums) const { - assert(UseStrings && UseChecksums); +std::shared_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings() && SC.hasChecksums()); auto Result = - llvm::make_unique<DebugLinesSubsection>(*UseChecksums, *UseStrings); + std::make_shared<DebugLinesSubsection>(*SC.checksums(), *SC.strings()); Result->setCodeSize(Lines.CodeSize); Result->setRelocationAddress(Lines.RelocSegment, Lines.RelocOffset); Result->setFlags(Lines.Flags); @@ -438,16 +418,16 @@ std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection( } } } - return llvm::cast<DebugSubsection>(std::move(Result)); + return Result; } -std::unique_ptr<DebugSubsection> +std::shared_ptr<DebugSubsection> YAMLInlineeLinesSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings, - DebugChecksumsSubsection *UseChecksums) const { - assert(UseChecksums); - auto Result = llvm::make_unique<DebugInlineeLinesSubsection>( - *UseChecksums, InlineeLines.HasExtraFiles); + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasChecksums()); + auto Result = std::make_shared<DebugInlineeLinesSubsection>( + *SC.checksums(), InlineeLines.HasExtraFiles); for (const auto &Site : InlineeLines.Sites) { Result->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName, @@ -459,56 +439,60 @@ YAMLInlineeLinesSubsection::toCodeViewSubsection( Result->addExtraFile(EF); } } - return llvm::cast<DebugSubsection>(std::move(Result)); + return Result; } -std::unique_ptr<DebugSubsection> +std::shared_ptr<DebugSubsection> YAMLCrossModuleExportsSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const { - auto Result = llvm::make_unique<DebugCrossModuleExportsSubsection>(); + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared<DebugCrossModuleExportsSubsection>(); for (const auto &M : Exports) Result->addMapping(M.Local, M.Global); - return llvm::cast<DebugSubsection>(std::move(Result)); + return Result; } -std::unique_ptr<DebugSubsection> +std::shared_ptr<DebugSubsection> YAMLCrossModuleImportsSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const { - auto Result = llvm::make_unique<DebugCrossModuleImportsSubsection>(*Strings); + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + + auto Result = + std::make_shared<DebugCrossModuleImportsSubsection>(*SC.strings()); for (const auto &M : Imports) { for (const auto Id : M.ImportIds) Result->addImport(M.ModuleName, Id); } - return llvm::cast<DebugSubsection>(std::move(Result)); + return Result; } -std::unique_ptr<DebugSubsection> YAMLSymbolsSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const { - auto Result = llvm::make_unique<DebugSymbolsSubsection>(); +std::shared_ptr<DebugSubsection> YAMLSymbolsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared<DebugSymbolsSubsection>(); for (const auto &Sym : Symbols) Result->addSymbol( Sym.toCodeViewSymbol(Allocator, CodeViewContainer::ObjectFile)); - return std::move(Result); + return Result; } -std::unique_ptr<DebugSubsection> +std::shared_ptr<DebugSubsection> YAMLStringTableSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const { - auto Result = llvm::make_unique<DebugStringTableSubsection>(); + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared<DebugStringTableSubsection>(); for (const auto &Str : this->Strings) Result->insert(Str); - return std::move(Result); + return Result; } -std::unique_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const { - assert(Strings); - auto Result = llvm::make_unique<DebugFrameDataSubsection>(); +std::shared_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + + auto Result = std::make_shared<DebugFrameDataSubsection>(); for (const auto &YF : Frames) { codeview::FrameData F; F.CodeSize = YF.CodeSize; @@ -519,20 +503,20 @@ std::unique_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection( F.PrologSize = YF.PrologSize; F.RvaStart = YF.RvaStart; F.SavedRegsSize = YF.SavedRegsSize; - F.FrameFunc = Strings->insert(YF.FrameFunc); + F.FrameFunc = SC.strings()->insert(YF.FrameFunc); Result->addFrameData(F); } - return std::move(Result); + return Result; } -std::unique_ptr<DebugSubsection> +std::shared_ptr<DebugSubsection> YAMLCoffSymbolRVASubsection::toCodeViewSubsection( - BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, - DebugChecksumsSubsection *Checksums) const { - auto Result = llvm::make_unique<DebugSymbolRVASubsection>(); + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared<DebugSymbolRVASubsection>(); for (const auto &RVA : RVAs) Result->addRVA(RVA); - return std::move(Result); + return Result; } static Expected<SourceFileChecksumEntry> @@ -741,63 +725,17 @@ YAMLCoffSymbolRVASubsection::fromCodeViewSubsection( return Result; } -Expected<std::vector<std::unique_ptr<DebugSubsection>>> +Expected<std::vector<std::shared_ptr<DebugSubsection>>> llvm::CodeViewYAML::toCodeViewSubsectionList( BumpPtrAllocator &Allocator, ArrayRef<YAMLDebugSubsection> Subsections, - DebugStringTableSubsection &Strings) { - std::vector<std::unique_ptr<DebugSubsection>> Result; + const codeview::StringsAndChecksums &SC) { + std::vector<std::shared_ptr<DebugSubsection>> Result; if (Subsections.empty()) return std::move(Result); - auto Checksums = findChecksums(Subsections); - std::unique_ptr<DebugSubsection> ChecksumsBase; - if (Checksums) - ChecksumsBase = - Checksums->toCodeViewSubsection(Allocator, &Strings, nullptr); - DebugChecksumsSubsection *CS = - static_cast<DebugChecksumsSubsection *>(ChecksumsBase.get()); for (const auto &SS : Subsections) { - // We've already converted the checksums subsection, don't do it - // twice. - std::unique_ptr<DebugSubsection> CVS; - if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) - CVS = std::move(ChecksumsBase); - else - CVS = SS.Subsection->toCodeViewSubsection(Allocator, &Strings, CS); - assert(CVS != nullptr); - Result.push_back(std::move(CVS)); - } - return std::move(Result); -} - -Expected<std::vector<std::unique_ptr<codeview::DebugSubsection>>> -llvm::CodeViewYAML::toCodeViewSubsectionList( - BumpPtrAllocator &Allocator, ArrayRef<YAMLDebugSubsection> Subsections, - std::unique_ptr<DebugStringTableSubsection> &TakeStrings, - DebugStringTableSubsection *StringsRef) { - std::vector<std::unique_ptr<DebugSubsection>> Result; - if (Subsections.empty()) - return std::move(Result); - - auto Checksums = findChecksums(Subsections); - - std::unique_ptr<DebugSubsection> ChecksumsBase; - if (Checksums) - ChecksumsBase = - Checksums->toCodeViewSubsection(Allocator, StringsRef, nullptr); - DebugChecksumsSubsection *CS = - static_cast<DebugChecksumsSubsection *>(ChecksumsBase.get()); - for (const auto &SS : Subsections) { - // We've already converted the checksums and string table subsection, don't - // do it twice. - std::unique_ptr<DebugSubsection> CVS; - if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) - CVS = std::move(ChecksumsBase); - else if (SS.Subsection->Kind == DebugSubsectionKind::StringTable) { - assert(TakeStrings && "No string table!"); - CVS = std::move(TakeStrings); - } else - CVS = SS.Subsection->toCodeViewSubsection(Allocator, StringsRef, CS); + std::shared_ptr<DebugSubsection> CVS; + CVS = SS.Subsection->toCodeViewSubsection(Allocator, SC); assert(CVS != nullptr); Result.push_back(std::move(CVS)); } @@ -810,23 +748,23 @@ struct SubsectionConversionVisitor : public DebugSubsectionVisitor { Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override; Error visitLines(DebugLinesSubsectionRef &Lines, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &Checksums, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &Inlinees, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitStringTable(DebugStringTableSubsectionRef &ST, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitSymbols(DebugSymbolsSubsectionRef &Symbols, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitFrameData(DebugFrameDataSubsectionRef &Symbols, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &Symbols, - const DebugSubsectionState &State) override; + const StringsAndChecksumsRef &State) override; YAMLDebugSubsection Subsection; }; @@ -837,7 +775,7 @@ Error SubsectionConversionVisitor::visitUnknown( } Error SubsectionConversionVisitor::visitLines( - DebugLinesSubsectionRef &Lines, const DebugSubsectionState &State) { + DebugLinesSubsectionRef &Lines, const StringsAndChecksumsRef &State) { auto Result = YAMLLinesSubsection::fromCodeViewSubsection( State.strings(), State.checksums(), Lines); if (!Result) @@ -847,7 +785,8 @@ Error SubsectionConversionVisitor::visitLines( } Error SubsectionConversionVisitor::visitFileChecksums( - DebugChecksumsSubsectionRef &Checksums, const DebugSubsectionState &State) { + DebugChecksumsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) { auto Result = YAMLChecksumsSubsection::fromCodeViewSubsection(State.strings(), Checksums); if (!Result) @@ -858,7 +797,7 @@ Error SubsectionConversionVisitor::visitFileChecksums( Error SubsectionConversionVisitor::visitInlineeLines( DebugInlineeLinesSubsectionRef &Inlinees, - const DebugSubsectionState &State) { + const StringsAndChecksumsRef &State) { auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection( State.strings(), State.checksums(), Inlinees); if (!Result) @@ -869,7 +808,7 @@ Error SubsectionConversionVisitor::visitInlineeLines( Error SubsectionConversionVisitor::visitCrossModuleExports( DebugCrossModuleExportsSubsectionRef &Exports, - const DebugSubsectionState &State) { + const StringsAndChecksumsRef &State) { auto Result = YAMLCrossModuleExportsSubsection::fromCodeViewSubsection(Exports); if (!Result) @@ -880,7 +819,7 @@ Error SubsectionConversionVisitor::visitCrossModuleExports( Error SubsectionConversionVisitor::visitCrossModuleImports( DebugCrossModuleImportsSubsectionRef &Imports, - const DebugSubsectionState &State) { + const StringsAndChecksumsRef &State) { auto Result = YAMLCrossModuleImportsSubsection::fromCodeViewSubsection( State.strings(), Imports); if (!Result) @@ -890,7 +829,8 @@ Error SubsectionConversionVisitor::visitCrossModuleImports( } Error SubsectionConversionVisitor::visitStringTable( - DebugStringTableSubsectionRef &Strings, const DebugSubsectionState &State) { + DebugStringTableSubsectionRef &Strings, + const StringsAndChecksumsRef &State) { auto Result = YAMLStringTableSubsection::fromCodeViewSubsection(Strings); if (!Result) return Result.takeError(); @@ -899,7 +839,7 @@ Error SubsectionConversionVisitor::visitStringTable( } Error SubsectionConversionVisitor::visitSymbols( - DebugSymbolsSubsectionRef &Symbols, const DebugSubsectionState &State) { + DebugSymbolsSubsectionRef &Symbols, const StringsAndChecksumsRef &State) { auto Result = YAMLSymbolsSubsection::fromCodeViewSubsection(Symbols); if (!Result) return Result.takeError(); @@ -908,7 +848,7 @@ Error SubsectionConversionVisitor::visitSymbols( } Error SubsectionConversionVisitor::visitFrameData( - DebugFrameDataSubsectionRef &Frames, const DebugSubsectionState &State) { + DebugFrameDataSubsectionRef &Frames, const StringsAndChecksumsRef &State) { auto Result = YAMLFrameDataSubsection::fromCodeViewSubsection(State.strings(), Frames); if (!Result) @@ -918,7 +858,7 @@ Error SubsectionConversionVisitor::visitFrameData( } Error SubsectionConversionVisitor::visitCOFFSymbolRVAs( - DebugSymbolRVASubsectionRef &RVAs, const DebugSubsectionState &State) { + DebugSymbolRVASubsectionRef &RVAs, const StringsAndChecksumsRef &State) { auto Result = YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(RVAs); if (!Result) return Result.takeError(); @@ -927,29 +867,71 @@ Error SubsectionConversionVisitor::visitCOFFSymbolRVAs( } } -Expected<YAMLDebugSubsection> YAMLDebugSubsection::fromCodeViewSubection( - const DebugStringTableSubsectionRef &Strings, - const DebugChecksumsSubsectionRef &Checksums, - const DebugSubsectionRecord &SS) { - DebugSubsectionState State(Strings, Checksums); +Expected<YAMLDebugSubsection> +YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC, + const DebugSubsectionRecord &SS) { SubsectionConversionVisitor V; - if (auto EC = visitDebugSubsection(SS, V, State)) + if (auto EC = visitDebugSubsection(SS, V, SC)) return std::move(EC); return V.Subsection; } -std::unique_ptr<DebugStringTableSubsection> -llvm::CodeViewYAML::findStringTable(ArrayRef<YAMLDebugSubsection> Sections) { - for (const auto &SS : Sections) { - if (SS.Subsection->Kind != DebugSubsectionKind::StringTable) - continue; +std::vector<YAMLDebugSubsection> +llvm::CodeViewYAML::fromDebugS(ArrayRef<uint8_t> Data, + const StringsAndChecksumsRef &SC) { + BinaryStreamReader Reader(Data, support::little); + uint32_t Magic; + + ExitOnError Err("Invalid .debug$S section!"); + Err(Reader.readInteger(Magic)); + assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$S section!"); + + DebugSubsectionArray Subsections; + Err(Reader.readArray(Subsections, Reader.bytesRemaining())); + + std::vector<YAMLDebugSubsection> Result; - // String Table doesn't use the allocator. - BumpPtrAllocator Allocator; - auto Result = - SS.Subsection->toCodeViewSubsection(Allocator, nullptr, nullptr); - return llvm::cast<DebugStringTableSubsection>(std::move(Result)); + for (const auto &SS : Subsections) { + auto YamlSS = Err(YAMLDebugSubsection::fromCodeViewSubection(SC, SS)); + Result.push_back(YamlSS); + } + return Result; +} + +void llvm::CodeViewYAML::initializeStringsAndChecksums( + ArrayRef<YAMLDebugSubsection> Sections, codeview::StringsAndChecksums &SC) { + // String Table and Checksums subsections don't use the allocator. + BumpPtrAllocator Allocator; + + // It's possible for checksums and strings to even appear in different debug$S + // sections, so we have to make this a stateful function that can build up + // the strings and checksums field over multiple iterations. + + // File Checksums require the string table, but may become before it, so we + // have to scan for strings first, then scan for checksums again from the + // beginning. + if (!SC.hasStrings()) { + for (const auto &SS : Sections) { + if (SS.Subsection->Kind != DebugSubsectionKind::StringTable) + continue; + + auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC); + SC.setStrings( + std::static_pointer_cast<DebugStringTableSubsection>(Result)); + break; + } + } + + if (SC.hasStrings() && !SC.hasChecksums()) { + for (const auto &SS : Sections) { + if (SS.Subsection->Kind != DebugSubsectionKind::FileChecksums) + continue; + + auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC); + SC.setChecksums( + std::static_pointer_cast<DebugChecksumsSubsection>(Result)); + break; + } } - return nullptr; } diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index fa3f1e0b60aa..ba3a2abe2097 100644 --- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -35,6 +35,7 @@ LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false) LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false) LLVM_YAML_DECLARE_ENUM_TRAITS(SymbolKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(FrameCookieKind) LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym2Flags) LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym3Flags) @@ -149,6 +150,15 @@ void ScalarEnumerationTraits<ThunkOrdinal>::enumeration(IO &io, } } +void ScalarEnumerationTraits<FrameCookieKind>::enumeration( + IO &io, FrameCookieKind &FC) { + auto ThunkNames = getFrameCookieKindNames(); + for (const auto &E : ThunkNames) { + io.enumCase(FC, E.Name.str().c_str(), + static_cast<FrameCookieKind>(E.Value)); + } +} + namespace llvm { namespace CodeViewYAML { namespace detail { @@ -183,8 +193,47 @@ template <typename T> struct SymbolRecordImpl : public SymbolRecordBase { mutable T Symbol; }; +struct UnknownSymbolRecord : public SymbolRecordBase { + explicit UnknownSymbolRecord(codeview::SymbolKind K) : SymbolRecordBase(K) {} + + void map(yaml::IO &io) override; + + CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const override { + RecordPrefix Prefix; + uint32_t TotalLen = sizeof(RecordPrefix) + Data.size(); + Prefix.RecordKind = Kind; + Prefix.RecordLen = TotalLen - 2; + uint8_t *Buffer = Allocator.Allocate<uint8_t>(TotalLen); + ::memcpy(Buffer, &Prefix, sizeof(RecordPrefix)); + ::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size()); + return CVSymbol(Kind, ArrayRef<uint8_t>(Buffer, TotalLen)); + } + Error fromCodeViewSymbol(CVSymbol CVS) override { + this->Kind = CVS.kind(); + Data = CVS.RecordData.drop_front(sizeof(RecordPrefix)); + return Error::success(); + } + + std::vector<uint8_t> Data; +}; + template <> void SymbolRecordImpl<ScopeEndSym>::map(IO &IO) {} +void UnknownSymbolRecord::map(yaml::IO &io) { + yaml::BinaryRef Binary; + if (io.outputting()) + Binary = yaml::BinaryRef(Data); + io.mapRequired("Data", Binary); + if (!io.outputting()) { + std::string Str; + raw_string_ostream OS(Str); + Binary.writeAsBinary(OS); + OS.flush(); + Data.assign(Str.begin(), Str.end()); + } +} + template <> void SymbolRecordImpl<Thunk32Sym>::map(IO &IO) { IO.mapRequired("Parent", Symbol.Parent); IO.mapRequired("End", Symbol.End); @@ -461,7 +510,7 @@ static inline Expected<CodeViewYAML::SymbolRecord> fromCodeViewSymbolImpl(CVSymbol Symbol) { CodeViewYAML::SymbolRecord Result; - auto Impl = std::make_shared<SymbolRecordImpl<SymbolType>>(Symbol.kind()); + auto Impl = std::make_shared<SymbolType>(Symbol.kind()); if (auto EC = Impl->fromCodeViewSymbol(Symbol)) return std::move(EC); Result.Symbol = Impl; @@ -472,12 +521,13 @@ Expected<CodeViewYAML::SymbolRecord> CodeViewYAML::SymbolRecord::fromCodeViewSymbol(CVSymbol Symbol) { #define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \ case EnumName: \ - return fromCodeViewSymbolImpl<ClassName>(Symbol); + return fromCodeViewSymbolImpl<SymbolRecordImpl<ClassName>>(Symbol); #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ SYMBOL_RECORD(EnumName, EnumVal, ClassName) switch (Symbol.kind()) { #include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" - default: { llvm_unreachable("Unknown symbol kind!"); } + default: + return fromCodeViewSymbolImpl<UnknownSymbolRecord>(Symbol); } return make_error<CodeViewError>(cv_error_code::corrupt_record); } @@ -486,7 +536,7 @@ template <typename ConcreteType> static void mapSymbolRecordImpl(IO &IO, const char *Class, SymbolKind Kind, CodeViewYAML::SymbolRecord &Obj) { if (!IO.outputting()) - Obj.Symbol = std::make_shared<SymbolRecordImpl<ConcreteType>>(Kind); + Obj.Symbol = std::make_shared<ConcreteType>(Kind); IO.mapRequired(Class, *Obj.Symbol); } @@ -500,12 +550,14 @@ void MappingTraits<CodeViewYAML::SymbolRecord>::mapping( #define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \ case EnumName: \ - mapSymbolRecordImpl<ClassName>(IO, #ClassName, Kind, Obj); \ + mapSymbolRecordImpl<SymbolRecordImpl<ClassName>>(IO, #ClassName, Kind, \ + Obj); \ break; #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ SYMBOL_RECORD(EnumName, EnumVal, ClassName) switch (Kind) { #include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" - default: { llvm_unreachable("Unknown symbol kind!"); } + default: + mapSymbolRecordImpl<UnknownSymbolRecord>(IO, "UnknownSym", Kind, Obj); } } diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp index 1302b0713d0e..a03b9cd50faa 100644 --- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp +++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -714,3 +714,43 @@ void MappingTraits<MemberRecord>::mapping(IO &IO, MemberRecord &Obj) { default: { llvm_unreachable("Unknown member kind!"); } } } + +std::vector<LeafRecord> +llvm::CodeViewYAML::fromDebugT(ArrayRef<uint8_t> DebugT) { + ExitOnError Err("Invalid .debug$T section!"); + BinaryStreamReader Reader(DebugT, support::little); + CVTypeArray Types; + uint32_t Magic; + + Err(Reader.readInteger(Magic)); + assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$T section!"); + + std::vector<LeafRecord> Result; + Err(Reader.readArray(Types, Reader.bytesRemaining())); + for (const auto &T : Types) { + auto CVT = Err(LeafRecord::fromCodeViewRecord(T)); + Result.push_back(CVT); + } + return Result; +} + +ArrayRef<uint8_t> llvm::CodeViewYAML::toDebugT(ArrayRef<LeafRecord> Leafs, + BumpPtrAllocator &Alloc) { + TypeTableBuilder TTB(Alloc, false); + uint32_t Size = sizeof(uint32_t); + for (const auto &Leaf : Leafs) { + CVType T = Leaf.toCodeViewRecord(TTB); + Size += T.length(); + assert(T.length() % 4 == 0 && "Improper type record alignment!"); + } + uint8_t *ResultBuffer = Alloc.Allocate<uint8_t>(Size); + MutableArrayRef<uint8_t> Output(ResultBuffer, Size); + BinaryStreamWriter Writer(Output, support::little); + ExitOnError Err("Error writing type record to .debug$T section"); + Err(Writer.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC)); + for (const auto &R : TTB.records()) { + Err(Writer.writeBytes(R)); + } + assert(Writer.bytesRemaining() == 0 && "Didn't write all type record bytes!"); + return Output; +} diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp index 70e25ea504a0..dbd5498e003d 100644 --- a/lib/ObjectYAML/ELFYAML.cpp +++ b/lib/ObjectYAML/ELFYAML.cpp @@ -372,6 +372,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration( ECase(SHT_GROUP); ECase(SHT_SYMTAB_SHNDX); ECase(SHT_LOOS); + ECase(SHT_LLVM_ODRTAB); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); ECase(SHT_GNU_verdef); diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp index e416df6a38dc..e581fee8bf38 100644 --- a/lib/Option/Arg.cpp +++ b/lib/Option/Arg.cpp @@ -1,4 +1,4 @@ -//===--- Arg.cpp - Argument Implementations -------------------------------===// +//===- Arg.cpp - Argument Implementations ---------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/Arg.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" +#include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -67,7 +67,7 @@ LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); } std::string Arg::getAsString(const ArgList &Args) const { SmallString<256> Res; - llvm::raw_svector_ostream OS(Res); + raw_svector_ostream OS(Res); ArgStringList ASL; render(Args, ASL); @@ -98,7 +98,7 @@ void Arg::render(const ArgList &Args, ArgStringList &Output) const { case Option::RenderCommaJoinedStyle: { SmallString<256> Res; - llvm::raw_svector_ostream OS(Res); + raw_svector_ostream OS(Res); OS << getSpelling(); for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ','; diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index 39dbce87f9ae..cbccc1935d3c 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -1,4 +1,4 @@ -//===--- ArgList.cpp - Argument List Management ---------------------------===// +//===- ArgList.cpp - Argument List Management -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/ArgList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Option/OptSpecifier.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <memory> +#include <string> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::opt; @@ -197,8 +208,6 @@ void ArgList::print(raw_ostream &O) const { LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); } #endif -// - void InputArgList::releaseMemory() { // An InputArgList always owns its arguments. for (Arg *A : *this) @@ -234,8 +243,6 @@ const char *InputArgList::MakeArgStringRef(StringRef Str) const { return getArgString(MakeIndex(Str)); } -// - DerivedArgList::DerivedArgList(const InputArgList &BaseArgs) : BaseArgs(BaseArgs) {} diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index b00d21ec8f67..52a81ff0e159 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -1,4 +1,4 @@ -//===--- OptTable.cpp - Option Table Implementation -----------------------===// +//===- OptTable.cpp - Option Table Implementation -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,16 +7,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/OptTable.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Option/OptSpecifier.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <cassert> #include <cctype> +#include <cstring> #include <map> +#include <string> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::opt; @@ -80,14 +89,14 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) { static inline bool operator<(const OptTable::Info &I, const char *Name) { return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0; } -} -} + +} // end namespace opt +} // end namespace llvm OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase) - : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), TheInputOptionID(0), - TheUnknownOptionID(0), FirstSearchableIndex(0) { + : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) { // Explicitly zero initialize the error to work around a bug in array // value-initialization on MinGW with gcc 4.3.5. @@ -138,8 +147,8 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase) } // Build prefix chars. - for (llvm::StringSet<>::const_iterator I = PrefixesUnion.begin(), - E = PrefixesUnion.end(); I != E; ++I) { + for (StringSet<>::const_iterator I = PrefixesUnion.begin(), + E = PrefixesUnion.end(); I != E; ++I) { StringRef Prefix = I->getKey(); for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end(); C != CE; ++C) @@ -148,8 +157,7 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase) } } -OptTable::~OptTable() { -} +OptTable::~OptTable() = default; const Option OptTable::getOption(OptSpecifier Opt) const { unsigned id = Opt.getID(); @@ -159,11 +167,11 @@ const Option OptTable::getOption(OptSpecifier Opt) const { return Option(&getInfo(id), this); } -static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) { +static bool isInput(const StringSet<> &Prefixes, StringRef Arg) { if (Arg == "-") return true; - for (llvm::StringSet<>::const_iterator I = Prefixes.begin(), - E = Prefixes.end(); I != E; ++I) + for (StringSet<>::const_iterator I = Prefixes.begin(), + E = Prefixes.end(); I != E; ++I) if (Arg.startswith(I->getKey())) return false; return true; @@ -346,7 +354,7 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { static void PrintHelpOptionList(raw_ostream &OS, StringRef Title, std::vector<std::pair<std::string, - const char*> > &OptionHelp) { + const char*>> &OptionHelp) { OS << Title << ":\n"; // Find the maximum option length. @@ -412,8 +420,8 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, // Render help text into a map of group-name to a list of (option, help) // pairs. - typedef std::map<std::string, - std::vector<std::pair<std::string, const char*> > > helpmap_ty; + using helpmap_ty = + std::map<std::string, std::vector<std::pair<std::string, const char*>>>; helpmap_ty GroupedOptionHelp; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp index 736b939fe80b..4832e659f026 100644 --- a/lib/Option/Option.cpp +++ b/lib/Option/Option.cpp @@ -1,4 +1,4 @@ -//===--- Option.cpp - Abstract Driver Options -----------------------------===// +//===- Option.cpp - Abstract Driver Options -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,22 +7,24 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/Option.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> #include <cassert> +#include <cstring> using namespace llvm; using namespace llvm::opt; Option::Option(const OptTable::Info *info, const OptTable *owner) : Info(info), Owner(owner) { - // Multi-level aliases are not supported. This just simplifies option // tracking, it is not an inherent limitation. assert((!Info || !getAlias().isValid() || !getAlias().getAlias().isValid()) && diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 1f638e768307..afd66f55720a 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -160,6 +160,10 @@ static cl::opt<bool> cl::Hidden, cl::ZeroOrMore, cl::desc("Run NewGVN instead of GVN")); +static cl::opt<bool> EnableEarlyCSEMemSSA( + "enable-npm-earlycse-memssa", cl::init(false), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = off)")); + static cl::opt<bool> EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); @@ -312,7 +316,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SROA()); // Catch trivial redundancies - FPM.addPass(EarlyCSEPass()); + FPM.addPass(EarlyCSEPass(EnableEarlyCSEMemSSA)); // Hoisting of scalars and load expressions. if (EnableGVNHoist) diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp index b22eb1ed12d0..c4276518b191 100644 --- a/lib/Support/BinaryStreamWriter.cpp +++ b/lib/Support/BinaryStreamWriter.cpp @@ -83,6 +83,8 @@ Error BinaryStreamWriter::padToAlignment(uint32_t Align) { uint32_t NewOffset = alignTo(Offset, Align); if (NewOffset > getLength()) return make_error<BinaryStreamError>(stream_error_code::stream_too_short); - Offset = NewOffset; + while (Offset < NewOffset) + if (auto EC = writeInteger('\0')) + return EC; return Error::success(); } diff --git a/lib/Support/DebugCounter.cpp b/lib/Support/DebugCounter.cpp index a10ac8e85396..1d46de04ee6a 100644 --- a/lib/Support/DebugCounter.cpp +++ b/lib/Support/DebugCounter.cpp @@ -102,9 +102,13 @@ void DebugCounter::push_back(const std::string &Val) { } } -void DebugCounter::print(raw_ostream &OS) { +void DebugCounter::print(raw_ostream &OS) const { OS << "Counters and values:\n"; for (const auto &KV : Counters) OS << left_justify(RegisteredCounters[KV.first], 32) << ": {" << KV.second.first << "," << KV.second.second << "}\n"; } + +LLVM_DUMP_METHOD void DebugCounter::dump() const { + print(dbgs()); +} diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index c9bca7f4c1ab..4496d06a15f3 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -26,7 +26,7 @@ using namespace llvm; // FoldingSetNodeIDRef Implementation /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, -/// used to lookup the node in the FoldingSetImpl. +/// used to lookup the node in the FoldingSetBase. unsigned FoldingSetNodeIDRef::ComputeHash() const { return static_cast<unsigned>(hash_combine_range(Data, Data+Size)); } @@ -142,7 +142,7 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { } /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to -/// lookup the node in the FoldingSetImpl. +/// lookup the node in the FoldingSetBase. unsigned FoldingSetNodeID::ComputeHash() const { return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); } @@ -180,7 +180,7 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { } //===----------------------------------------------------------------------===// -/// Helper functions for FoldingSetImpl. +/// Helper functions for FoldingSetBase. /// GetNextPtr - In order to save space, each bucket is a /// singly-linked-list. In order to make deletion more efficient, we make @@ -188,12 +188,12 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { /// The problem with this is that the start of the hash buckets are not /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: /// use GetBucketPtr when this happens. -static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { +static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { // The low bit is set if this is the pointer back to the bucket. if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) return nullptr; - return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); + return static_cast<FoldingSetBase::Node*>(NextInBucketPtr); } @@ -221,11 +221,11 @@ static void **AllocateBuckets(unsigned NumBuckets) { } //===----------------------------------------------------------------------===// -// FoldingSetImpl Implementation +// FoldingSetBase Implementation -void FoldingSetImpl::anchor() {} +void FoldingSetBase::anchor() {} -FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { +FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) { assert(5 < Log2InitSize && Log2InitSize < 32 && "Initial hash table size out of range"); NumBuckets = 1 << Log2InitSize; @@ -233,14 +233,14 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { NumNodes = 0; } -FoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg) +FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg) : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { Arg.Buckets = nullptr; Arg.NumBuckets = 0; Arg.NumNodes = 0; } -FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { +FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) { free(Buckets); // This may be null if the set is in a moved-from state. Buckets = RHS.Buckets; NumBuckets = RHS.NumBuckets; @@ -251,11 +251,11 @@ FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { return *this; } -FoldingSetImpl::~FoldingSetImpl() { +FoldingSetBase::~FoldingSetBase() { free(Buckets); } -void FoldingSetImpl::clear() { +void FoldingSetBase::clear() { // Set all but the last bucket to null pointers. memset(Buckets, 0, NumBuckets*sizeof(void*)); @@ -266,7 +266,7 @@ void FoldingSetImpl::clear() { NumNodes = 0; } -void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) { +void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) { assert((NewBucketCount > NumBuckets) && "Can't shrink a folding set with GrowBucketCount"); assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!"); void **OldBuckets = Buckets; @@ -300,11 +300,11 @@ void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) { /// GrowHashTable - Double the size of the hash table and rehash everything. /// -void FoldingSetImpl::GrowHashTable() { +void FoldingSetBase::GrowHashTable() { GrowBucketCount(NumBuckets * 2); } -void FoldingSetImpl::reserve(unsigned EltCount) { +void FoldingSetBase::reserve(unsigned EltCount) { // This will give us somewhere between EltCount / 2 and // EltCount buckets. This puts us in the load factor // range of 1.0 - 2.0. @@ -316,9 +316,9 @@ void FoldingSetImpl::reserve(unsigned EltCount) { /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, /// return it. If not, return the insertion token that will make insertion /// faster. -FoldingSetImpl::Node -*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, - void *&InsertPos) { +FoldingSetBase::Node * +FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { unsigned IDHash = ID.ComputeHash(); void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; @@ -342,7 +342,7 @@ FoldingSetImpl::Node /// InsertNode - Insert the specified node into the folding set, knowing that it /// is not already in the map. InsertPos must be obtained from /// FindNodeOrInsertPos. -void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { +void FoldingSetBase::InsertNode(Node *N, void *InsertPos) { assert(!N->getNextInBucket()); // Do we need to grow the hashtable? if (NumNodes+1 > capacity()) { @@ -371,7 +371,7 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { /// RemoveNode - Remove a node from the folding set, returning true if one was /// removed or false if the node was not in the folding set. -bool FoldingSetImpl::RemoveNode(Node *N) { +bool FoldingSetBase::RemoveNode(Node *N) { // Because each bucket is a circular list, we don't need to compute N's hash // to remove it. void *Ptr = N->getNextInBucket(); @@ -412,7 +412,7 @@ bool FoldingSetImpl::RemoveNode(Node *N) { /// GetOrInsertNode - If there is an existing simple Node exactly /// equal to the specified node, return it. Otherwise, insert 'N' and it /// instead. -FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { +FoldingSetBase::Node *FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N) { FoldingSetNodeID ID; GetNodeProfile(N, ID); void *IP; diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp index db03a4d6240d..22b7550d4971 100644 --- a/lib/Support/ThreadPool.cpp +++ b/lib/Support/ThreadPool.cpp @@ -53,11 +53,7 @@ ThreadPool::ThreadPool(unsigned ThreadCount) Tasks.pop(); } // Run the task we just grabbed -#ifndef _MSC_VER Task(); -#else - Task(/* unused */ false); -#endif { // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() @@ -82,7 +78,7 @@ void ThreadPool::wait() { [&] { return !ActiveThreads && Tasks.empty(); }); } -std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) { +std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) { /// Wrap the Task in a packaged_task to return a future object. PackagedTaskTy PackagedTask(std::move(Task)); auto Future = PackagedTask.get_future(); @@ -128,25 +124,16 @@ void ThreadPool::wait() { while (!Tasks.empty()) { auto Task = std::move(Tasks.front()); Tasks.pop(); -#ifndef _MSC_VER - Task(); -#else - Task(/* unused */ false); -#endif + Task(); } } -std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) { -#ifndef _MSC_VER +std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) { // Get a Future with launch::deferred execution using std::async auto Future = std::async(std::launch::deferred, std::move(Task)).share(); // Wrap the future so that both ThreadPool::wait() can operate and the // returned future can be sync'ed on. PackagedTaskTy PackagedTask([Future]() { Future.get(); }); -#else - auto Future = std::async(std::launch::deferred, std::move(Task), false).share(); - PackagedTaskTy PackagedTask([Future](bool) -> bool { Future.get(); return false; }); -#endif Tasks.push(std::move(PackagedTask)); return Future; } diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 7d3537e20727..2df0eaff47e5 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -163,16 +163,6 @@ static void SetMemoryLimits (unsigned size) r.rlim_cur = limit; setrlimit (RLIMIT_RSS, &r); #endif -#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it. - // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb - // of virtual memory for shadow memory mapping. -#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD - // Virtual memory. - getrlimit (RLIMIT_AS, &r); - r.rlim_cur = limit; - setrlimit (RLIMIT_AS, &r); -#endif -#endif #endif } diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 83f7147dc9f6..b2636e1e6cb4 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -11,20 +11,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/Record.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" #include <cassert> #include <cstdint> -#include <new> +#include <memory> +#include <string> +#include <utility> +#include <vector> using namespace llvm; @@ -162,7 +170,8 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { // Initializer implementations //===----------------------------------------------------------------------===// -void Init::anchor() { } +void Init::anchor() {} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); } #endif @@ -301,7 +310,6 @@ static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) { // resolveReferences - If there are any field references that refer to fields // that have been filled in, we can propagate the values now. -// Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { bool Changed = false; SmallVector<Init *, 16> NewBits(getNumBits()); @@ -615,7 +623,7 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const { Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { switch (getOpcode()) { - case CAST: { + case CAST: if (isa<StringRecTy>(getType())) { if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) return LHSs; @@ -680,15 +688,15 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { } } break; - } - case HEAD: { + + case HEAD: if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) { assert(!LHSl->empty() && "Empty list in head"); return LHSl->getElement(0); } break; - } - case TAIL: { + + case TAIL: if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) { assert(!LHSl->empty() && "Empty list in tail"); // Note the +1. We can't just pass the result of getValues() @@ -696,16 +704,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { return ListInit::get(LHSl->getValues().slice(1), LHSl->getType()); } break; - } - case EMPTY: { + + case EMPTY: if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) return IntInit::get(LHSl->empty()); if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) return IntInit::get(LHSs->getValue().empty()); - break; } - } return const_cast<UnOpInit *>(this); } @@ -948,7 +954,6 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Record *CurRec, MultiClass *CurMultiClass) { - OpInit *RHSo = dyn_cast<OpInit>(RHS); if (!RHSo) @@ -1245,7 +1250,7 @@ VarInit *VarInit::get(StringRef VN, RecTy *T) { } VarInit *VarInit::get(Init *VN, RecTy *T) { - typedef std::pair<RecTy *, Init *> Key; + using Key = std::pair<RecTy *, Init *>; static DenseMap<Key, VarInit*> ThePool; Key TheKey(std::make_pair(T, VN)); @@ -1320,7 +1325,7 @@ Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const { } VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) { - typedef std::pair<TypedInit *, unsigned> Key; + using Key = std::pair<TypedInit *, unsigned>; static DenseMap<Key, VarBitInit*> ThePool; Key TheKey(std::make_pair(T, B)); @@ -1352,7 +1357,7 @@ Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const { VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) { - typedef std::pair<TypedInit *, unsigned> Key; + using Key = std::pair<TypedInit *, unsigned>; static DenseMap<Key, VarListElementInit*> ThePool; Key TheKey(std::make_pair(T, E)); @@ -1422,7 +1427,7 @@ std::string DefInit::getAsString() const { } FieldInit *FieldInit::get(Init *R, StringInit *FN) { - typedef std::pair<Init *, StringInit *> Key; + using Key = std::pair<Init *, StringInit *>; static DenseMap<Key, FieldInit*> ThePool; Key TheKey(std::make_pair(R, FN)); diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp index a4d33051b4f7..733e0aeef623 100644 --- a/lib/TableGen/SetTheory.cpp +++ b/lib/TableGen/SetTheory.cpp @@ -12,18 +12,29 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/SetTheory.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Format.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" +#include <algorithm> +#include <cstdint> +#include <string> +#include <utility> using namespace llvm; // Define the standard operators. namespace { -typedef SetTheory::RecSet RecSet; -typedef SetTheory::RecVec RecVec; +using RecSet = SetTheory::RecSet; +using RecVec = SetTheory::RecVec; // (add a, b, ...) Evaluate and union all arguments. struct AddOp : public SetTheory::Operator { @@ -237,13 +248,13 @@ struct FieldExpander : public SetTheory::Expander { ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc()); } }; + } // end anonymous namespace // Pin the vtables to this file. void SetTheory::Operator::anchor() {} void SetTheory::Expander::anchor() {} - SetTheory::SetTheory() { addOperator("add", llvm::make_unique<AddOp>()); addOperator("sub", llvm::make_unique<SubOp>()); @@ -321,4 +332,3 @@ const RecVec *SetTheory::expand(Record *Set) { // Set is not expandable. return nullptr; } - diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index abe28460c83a..53eef79c4df3 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -362,6 +362,7 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", def : ProcessorModel<"generic", NoSchedModel, [ FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index e8fcf1a0e9b7..7bf2097c17ce 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1282,6 +1282,10 @@ unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || + RHSReg == AArch64::SP || RHSReg == AArch64::WSP) + return 0; + if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -1362,6 +1366,8 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, uint64_t ShiftImm, bool SetFlags, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && + RHSReg != AArch64::SP && RHSReg != AArch64::WSP); if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -1403,6 +1409,8 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, uint64_t ShiftImm, bool SetFlags, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && + RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 059556a560c0..083ca2156598 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9366,7 +9366,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); StoreSDNode *S = cast<StoreSDNode>(N); - if (S->isVolatile()) + if (S->isVolatile() || S->isIndexed()) return SDValue(); SDValue StVal = S->getValue(); diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td index 7402bcf1346c..3d737402022d 100644 --- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -160,6 +160,21 @@ def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { let NumMicroOps = 2; } +def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 12; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 2; +} + def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { let Latency = 2; let NumMicroOps = 2; @@ -195,10 +210,10 @@ def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { let ResourceCycles = [2, 8]; } -def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { - let Latency = 16; +def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 11; let NumMicroOps = 2; - let ResourceCycles = [2, 16]; + let ResourceCycles = [2, 11]; } def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { @@ -289,9 +304,27 @@ def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, Fa //===----------------------------------------------------------------------===// // Define 4 micro-op types -def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, - FalkorUnitVX, FalkorUnitVY]> { - let Latency = 2; +def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 20; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 24; let NumMicroOps = 4; } @@ -575,7 +608,8 @@ def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)v2f32$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>; @@ -592,7 +626,10 @@ def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>; def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>; -def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32)$")>; +def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>; +def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>; def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; @@ -1039,8 +1076,10 @@ def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(S|D)rr$")>; -def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(S|D)r$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>; +def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>; +def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>; def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], (instregex "^F(N)?M(ADD|SUB)Srrr$")>; @@ -1112,7 +1151,7 @@ def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64 (instregex "^M(ADD|SUB)Xrrr$")>; def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; -def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>; +def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>; def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(S|U)MULLv.*$")>; diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index d3cab1ad3397..a9a9d5ce8429 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -92,6 +92,10 @@ void AArch64Subtarget::initializeProperties() { MaxInterleaveFactor = 4; // FIXME: remove this to enable 64-bit SLP if performance looks good. MinVectorRegisterBitWidth = 128; + CacheLineSize = 128; + PrefetchDistance = 820; + MinPrefetchStride = 2048; + MaxPrefetchIterationsAhead = 8; break; case Kryo: MaxInterleaveFactor = 4; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index d0299149c38c..290a1ca1f24b 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -78,7 +78,7 @@ public: return 31; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 8084d368c80f..6f3742ed039b 100644 --- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -9,7 +9,7 @@ // /// \file /// This pass marks all internal functions as always_inline and creates -/// duplicates of all other functions a marks the duplicates as always_inline. +/// duplicates of all other functions and marks the duplicates as always_inline. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 3c788fa1dcea..6f002860044c 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -107,7 +107,7 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { DFS(Start, Checklist); for (auto &BB : Checklist) { - BasicBlock::iterator StartIt = (BB == Load->getParent()) ? + BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? BasicBlock::iterator(Load) : BB->end(); if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, StartIt, BB, Load).isClobber()) diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 251c2f9bb25a..f235313e4853 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -138,7 +138,10 @@ private: bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, SDValue &ImmOffset, SDValue &VOffset) const; - bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const; + bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; + bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; @@ -1313,14 +1316,37 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, return true; } -bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, - SDValue &VAddr, - SDValue &SLC) const { +bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + int64_t OffsetVal = 0; + + if (Subtarget->hasFlatInstOffsets() && + CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue(); + if (isUInt<12>(COffsetVal)) { + Addr = N0; + OffsetVal = COffsetVal; + } + } + VAddr = Addr; + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; } +bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + return SelectFlatOffset(Addr, VAddr, Offset, SLC); +} + bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const { diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a7eac080f885..e54c887d6090 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -126,8 +126,9 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) .add(I.getOperand(1)) .add(I.getOperand(0)) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc // Now that we selected an opcode, we need to constrain the register @@ -392,8 +393,9 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) .add(I.getOperand(0)) .addReg(PtrReg) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); I.eraseFromParent(); diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b889788c3426..790a69b84397 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -34,6 +34,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() { const LLT P1 = LLT::pointer(1, 64); const LLT P2 = LLT::pointer(2, 64); + setAction({G_ADD, S32}, Legal); + // FIXME: i1 operands to intrinsics should always be legal, but other i1 // values may not be legal. We need to figure out how to distinguish // between these two scenarios. diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index dee3d2856701..0d6689bd04c4 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -195,7 +195,7 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } -unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const { return Vector ? 0 : 32; } @@ -489,6 +489,19 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { return false; } +bool AMDGPUTTIImpl::isAlwaysUniform(const Value *V) const { + if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_readfirstlane: + case Intrinsic::amdgcn_readlane: + return true; + } + } + return false; +} + unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { if (ST->hasVOP3PInsts()) { diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index e0024e21e82b..a60b1bb1b59c 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -76,7 +76,7 @@ public: } unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, @@ -103,6 +103,7 @@ public: int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; + bool isAlwaysUniform(const Value *V) const; unsigned getFlatAddressSpace() const { // Don't bother running InferAddressSpaces pass on graphics shaders which diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 16e3b7b4ebee..392e9d89bd9b 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -285,6 +285,9 @@ public: bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } + + bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); } + bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } @@ -886,6 +889,10 @@ public: return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } + bool hasFlatOffsets() const { + return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; + } + bool hasSGPR102_SGPR103() const { return !isVI(); } @@ -1034,6 +1041,7 @@ public: AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMRDOffset20() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; + AMDGPUOperand::Ptr defaultOffsetU12() const; OperandMatchResultTy parseOModOperand(OperandVector &Operands); @@ -1970,6 +1978,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } } + if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { + // FIXME: Produces error without correct column reported. + auto OpNum = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); + const auto &Op = Inst.getOperand(OpNum); + if (Op.getImm() != 0) + return Match_InvalidOperand; + } + return Match_Success; } @@ -3849,6 +3866,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + //===----------------------------------------------------------------------===// // vop3 //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 8ba9efd42c70..98eda288bcac 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// -def FLATAtomic : ComplexPattern<i64, 2, "SelectFlat">; +def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>; +def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>; //===----------------------------------------------------------------------===// // FLAT classes @@ -55,6 +56,8 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AsmMatchConverter = ps.AsmMatchConverter; + let TSFlags = ps.TSFlags; + let UseNamedOperandTable = ps.UseNamedOperandTable; // encoding fields bits<8> vaddr; @@ -63,10 +66,23 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : bits<1> slc; bits<1> glc; + // Only valid on gfx9 + bits<1> lds = 0; // XXX - What does this actually do? + bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + + // Signed offset. Highest bit ignored for flat and treated as 12-bit + // unsigned for flat acceses. + bits<13> offset; + bits<1> nv = 0; // XXX - What does this actually do? + // We don't use tfe right now, and it was removed in gfx9. bits<1> tfe = 0; - // 15-0 is reserved. + // Only valid on GFX9+ + let Inst{12-0} = offset; + let Inst{13} = lds; + let Inst{15-14} = 0; + let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); let Inst{17} = slc; let Inst{24-18} = op; @@ -74,24 +90,30 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : let Inst{39-32} = vaddr; let Inst{47-40} = !if(ps.has_data, vdata, ?); // 54-48 is reserved. - let Inst{55} = tfe; + let Inst{55} = nv; // nv on GFX9+, TFE before. let Inst{63-56} = !if(ps.has_vdst, vdst, ?); } -class FLAT_Load_Pseudo <string opName, RegisterClass regClass> : FLAT_Pseudo< +class FLAT_Load_Pseudo <string opName, RegisterClass regClass, + bit HasSignedOffset = 0> : FLAT_Pseudo< opName, (outs regClass:$vdst), - (ins VReg_64:$vaddr, GLC:$glc, slc:$slc), - " $vdst, $vaddr$glc$slc"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vdst, $vaddr$offset$glc$slc"> { let has_data = 0; let mayLoad = 1; } -class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass> : FLAT_Pseudo< +class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, + bit HasSignedOffset = 0> : FLAT_Pseudo< opName, (outs), - (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc), - " $vaddr, $vdata$glc$slc"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vaddr, $vdata$offset$glc$slc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -103,12 +125,15 @@ multiclass FLAT_Atomic_Pseudo< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit HasSignedOffset = 0> { def "" : FLAT_Pseudo <opName, (outs), - (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc), - " $vaddr, $vdata$slc", + !if(HasSignedOffset, + (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc), + (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)), + " $vaddr, $vdata$offset$slc", []>, AtomicNoRet <NAME, 0> { let mayLoad = 1; @@ -121,10 +146,12 @@ multiclass FLAT_Atomic_Pseudo< def _RTN : FLAT_Pseudo <opName, (outs vdst_rc:$vdst), - (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc), - " $vdst, $vaddr, $vdata glc$slc", + !if(HasSignedOffset, + (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc), + (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)), + " $vdst, $vaddr, $vdata$offset glc$slc", [(set vt:$vdst, - (atomic (FLATAtomic i64:$vaddr, i1:$slc), data_vt:$vdata))]>, + (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, AtomicNoRet <NAME, 1> { let mayLoad = 1; let mayStore = 1; @@ -312,31 +339,31 @@ def flat_truncstorei16 : flat_st <truncstorei16>; // Patterns for global loads with no offset. class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat < - (vt (node i64:$addr)), - (inst $addr, 0, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (inst $vaddr, $offset, 0, $slc) >; class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat < - (vt (node i64:$addr)), - (inst $addr, 1, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (inst $vaddr, $offset, 1, $slc) >; class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat < - (node vt:$data, i64:$addr), - (inst $addr, $data, 0, 0) + (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)), + (inst $vaddr, $data, $offset, 0, $slc) >; class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat < // atomic store follows atomic binop convention so the address comes // first. - (node i64:$addr, vt:$data), - (inst $addr, $data, 1, 0) + (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), + (inst $vaddr, $data, $offset, 1, $slc) >; class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, ValueType data_vt = vt> : Pat < - (vt (node i64:$addr, data_vt:$data)), - (inst $addr, $data, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), + (inst $vaddr, $data, $offset, $slc) >; let Predicates = [isCIVI] in { diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 599ee942d738..441f1ef4bd04 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -567,9 +567,17 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, } bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const { - // Flat instructions do not have offsets, and only have the register - // address. - return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); + if (!Subtarget->hasFlatInstOffsets()) { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && AM.Scale == 0; + } + + // GFX9 added a 13-bit signed offset. When using regular flat instructions, + // the sign bit is ignored and is treated as a 12-bit unsigned offset. + + // Just r + i + return isUInt<12>(AM.BaseOffs) && AM.Scale == 0; } bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 58c05cf16f15..1097814e99ce 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -468,13 +468,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); - if (Idx == SubIndices.size() - 1) - Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); - if (Idx == 0) Builder.addReg(DestReg, RegState::Define | RegState::Implicit); - Builder.addReg(SrcReg, RegState::Implicit); + bool UseKill = KillSrc && Idx == SubIndices.size() - 1; + Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); } } @@ -2331,11 +2329,12 @@ static bool isSubRegOf(const SIRegisterInfo &TRI, bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { uint16_t Opcode = MI.getOpcode(); - if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) return true; - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineFunction *MF = MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); @@ -2565,6 +2564,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) { + const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); + if (Offset->getImm() != 0) { + ErrInfo = "subtarget does not support offsets in flat instructions"; + return false; + } + } + return true; } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 445bf79a7814..470a47b02443 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -492,11 +492,21 @@ class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> { let ParserMatchClass = MatchClass; } +class NamedOperandU12<string Name, AsmOperandClass MatchClass> : Operand<i16> { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; } +class NamedOperandS13<string Name, AsmOperandClass MatchClass> : Operand<i16> { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; @@ -514,6 +524,8 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; +def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>; +def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>; def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index a7ac9a1dca6e..e498f70b820d 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -35,9 +35,19 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { - if (T->isArrayTy() || T->isStructTy()) + if (T->isArrayTy()) return true; + if (T->isStructTy()) { + // For now we only allow homogeneous structs that we can manipulate with + // G_MERGE_VALUES and G_UNMERGE_VALUES + auto StructT = cast<StructType>(T); + for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i) + if (StructT->getElementType(i) != StructT->getElementType(0)) + return false; + return true; + } + EVT VT = TLI.getValueType(DL, T, true); if (!VT.isSimple() || VT.isVector() || !(VT.isInteger() || VT.isFloatingPoint())) @@ -220,12 +230,16 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, return false; SmallVector<ArgInfo, 4> SplitVTs; + SmallVector<unsigned, 4> Regs; ArgInfo RetInfo(VReg, Val->getType()); setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, VReg, Offset); + Regs.push_back(Reg); }); + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, VReg); + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); @@ -344,26 +358,6 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { return 1; } - /// Merge the values in \p SrcRegs into \p DstReg at offsets \p SrcOffsets. - /// Note that the source registers are not required to have homogeneous types, - /// so we use G_INSERT rather than G_MERGE_VALUES. - // FIXME: Use G_MERGE_VALUES if the types are homogeneous. - void mergeRegisters(unsigned DstReg, ArrayRef<unsigned> SrcRegs, - ArrayRef<uint64_t> SrcOffsets) { - LLT Ty = MRI.getType(DstReg); - - unsigned Dst = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildUndef(Dst); - - for (unsigned i = 0; i < SrcRegs.size(); ++i) { - unsigned Tmp = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInsert(Tmp, Dst, SrcRegs[i], SrcOffsets[i]); - Dst = Tmp; - } - - MIRBuilder.buildCopy(DstReg, Dst); - } - /// Marking a physical register as used is different between formal /// parameters, where it's a basic block live-in, and call returns, where it's /// an implicit-def of the call instruction. @@ -413,22 +407,19 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 8> ArgInfos; SmallVector<unsigned, 4> SplitRegs; - SmallVector<uint64_t, 4> RegOffsets; unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); SplitRegs.clear(); - RegOffsets.clear(); splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { SplitRegs.push_back(Reg); - RegOffsets.push_back(Offset); }); if (!SplitRegs.empty()) - ArgHandler.mergeRegisters(VRegs[Idx], SplitRegs, RegOffsets); + MIRBuilder.buildMerge(VRegs[Idx], SplitRegs); Idx++; } @@ -490,9 +481,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!Arg.IsFixed) return false; + SmallVector<unsigned, 8> Regs; splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, Arg.Reg, Offset); + Regs.push_back(Reg); }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, Arg.Reg); } auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); @@ -508,11 +503,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; ArgInfos.clear(); - SmallVector<uint64_t, 8> RegOffsets; SmallVector<unsigned, 8> SplitRegs; splitToValueTypes(OrigRet, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { - RegOffsets.push_back(Offset); SplitRegs.push_back(Reg); }); @@ -521,10 +514,10 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; - if (!RegOffsets.empty()) { + if (!SplitRegs.empty()) { // We have split the value and allocated each individual piece, now build // it up again. - RetHandler.mergeRegisters(OrigRet.Reg, SplitRegs, RegOffsets); + MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs); } } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 817b567db767..5d887c4fcbf2 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -2010,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -2018,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2028,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0, IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -2059,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2076,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0, IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 2d490b7c303e..a706079d9866 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "ARMLegalizerInfo.h" +#include "ARMCallLowering.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -63,6 +65,16 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, s32}, Libcall); } + // FIXME: Support s8 and s16 as well + for (unsigned Op : {G_SREM, G_UREM}) + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Lower); + else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() || + ST.isTargetMuslAEABI()) + setAction({Op, s32}, Custom); + else + setAction({Op, s32}, Libcall); + for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); for (auto Ty : {s1, s8, s16}) @@ -134,5 +146,38 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, } return true; } + case G_SREM: + case G_UREM: { + unsigned OriginalResult = MI.getOperand(0).getReg(); + auto Size = MRI.getType(OriginalResult).getSizeInBits(); + if (Size != 32) + return false; + + auto Libcall = + MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; + + // Our divmod libcalls return a struct containing the quotient and the + // remainder. We need to create a virtual register for it. + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + Type *ArgTy = Type::getInt32Ty(Ctx); + StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); + auto RetVal = MRI.createGenericVirtualRegister( + getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); + + auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); + if (Status != LegalizerHelper::Legalized) + return false; + + // The remainder is the second result of divmod. Split the return value into + // a new, unused register for the quotient and the destination of the + // original instruction for the remainder. + MIRBuilder.buildUnmerge( + {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, + RetVal); + + return LegalizerHelper::Legalized; + } } } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 7de0543dfa5e..8a1a37863877 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -78,7 +78,7 @@ public: return 13; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index fcd903b7a4a8..9397c78f3dff 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -35,14 +35,15 @@ using namespace llvm; namespace { class BPFAsmPrinter : public AsmPrinter { public: - explicit BPFAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + explicit BPFAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)) {} StringRef getPassName() const override { return "BPF Assembly Printer"; } void EmitInstruction(const MachineInstr *MI) override; }; -} +} // namespace void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) { diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 279cdb1a89b4..7d5fb6ca17b9 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -22,11 +22,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" + using namespace llvm; #define DEBUG_TYPE "bpf-isel" @@ -42,6 +45,8 @@ public: return "BPF DAG->DAG Pattern Instruction Selection"; } + void PreprocessISelDAG() override; + private: // Include the pieces autogenerated from the target description. #include "BPFGenDAGISel.inc" @@ -51,15 +56,31 @@ private: // Complex Pattern for address selection. bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + + // Find constants from a constant structure + typedef std::vector<unsigned char> val_vec_type; + bool fillGenericConstant(const DataLayout &DL, const Constant *CV, + val_vec_type &Vals, uint64_t Offset); + bool fillConstantDataArray(const DataLayout &DL, const ConstantDataArray *CDA, + val_vec_type &Vals, int Offset); + bool fillConstantArray(const DataLayout &DL, const ConstantArray *CA, + val_vec_type &Vals, int Offset); + bool fillConstantStruct(const DataLayout &DL, const ConstantStruct *CS, + val_vec_type &Vals, int Offset); + bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset, + uint64_t Size, unsigned char *ByteSeq); + + // Mapping from ConstantStruct global value to corresponding byte-list values + std::map<const void *, val_vec_type> cs_vals_; }; -} +} // namespace // ComplexPattern used on BPF Load/Store instructions bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // if Address is FI, get the TargetFrameIndex. SDLoc DL(Addr); if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; } @@ -85,13 +106,14 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { } } - Base = Addr; + Base = Addr; Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; } // ComplexPattern used on BPF FI instruction -bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { +bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) { SDLoc DL(Addr); if (!CurDAG->isBaseWithConstantOffset(Addr)) @@ -102,8 +124,7 @@ bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = - dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); else return false; @@ -129,7 +150,8 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { // tablegen selection should be handled here. switch (Opcode) { - default: break; + default: + break; case ISD::SDIV: { DebugLoc Empty; const DebugLoc &DL = Node->getDebugLoc(); @@ -181,6 +203,210 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } +void BPFDAGToDAGISel::PreprocessISelDAG() { + // Iterate through all nodes, only interested in loads from ConstantStruct + // ConstantArray should have converted by IR->DAG processing + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *Node = &*I++; + unsigned Opcode = Node->getOpcode(); + if (Opcode != ISD::LOAD) + continue; + + unsigned char new_val[8]; // hold up the constant values replacing loads. + bool to_replace = false; + SDLoc DL(Node); + const LoadSDNode *LD = cast<LoadSDNode>(Node); + uint64_t size = LD->getMemOperand()->getSize(); + if (!size || size > 8 || (size & (size - 1))) + continue; + + SDNode *LDAddrNode = LD->getOperand(1).getNode(); + // Match LDAddr against either global_addr or (global_addr + offset) + unsigned opcode = LDAddrNode->getOpcode(); + if (opcode == ISD::ADD) { + SDValue OP1 = LDAddrNode->getOperand(0); + SDValue OP2 = LDAddrNode->getOperand(1); + + // We want to find the pattern global_addr + offset + SDNode *OP1N = OP1.getNode(); + if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || + OP1N->getNumOperands() == 0) + continue; + + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode()); + const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode()); + if (GADN && CDN) + to_replace = + getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val); + } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END && + LDAddrNode->getNumOperands() > 0) { + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + SDValue OP1 = LDAddrNode->getOperand(0); + if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(OP1.getNode())) + to_replace = getConstantFieldValue(GADN, 0, size, new_val); + } + + if (!to_replace) + continue; + + // replacing the old with a new value + uint64_t val; + if (size == 1) + val = *(uint8_t *)new_val; + else if (size == 2) + val = *(uint16_t *)new_val; + else if (size == 4) + val = *(uint32_t *)new_val; + else { + val = *(uint64_t *)new_val; + } + + DEBUG(dbgs() << "Replacing load of size " << size << " with constant " + << val << '\n'); + SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); + + // After replacement, the current node is dead, we need to + // go backward one step to make iterator still work + I--; + SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)}; + SDValue To[] = {NVal, NVal}; + CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); + I++; + // It is safe to delete node now + CurDAG->DeleteNode(Node); + } +} + +bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node, + uint64_t Offset, uint64_t Size, + unsigned char *ByteSeq) { + const GlobalVariable *V = dyn_cast<GlobalVariable>(Node->getGlobal()); + + if (!V || !V->hasInitializer()) + return false; + + const Constant *Init = V->getInitializer(); + const DataLayout &DL = CurDAG->getDataLayout(); + val_vec_type TmpVal; + + auto it = cs_vals_.find(static_cast<const void *>(Init)); + if (it != cs_vals_.end()) { + TmpVal = it->second; + } else { + uint64_t total_size = 0; + if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) + total_size = + DL.getStructLayout(cast<StructType>(CS->getType()))->getSizeInBytes(); + else if (const ConstantArray *CA = dyn_cast<ConstantArray>(Init)) + total_size = DL.getTypeAllocSize(CA->getType()->getElementType()) * + CA->getNumOperands(); + else + return false; + + val_vec_type Vals(total_size, 0); + if (fillGenericConstant(DL, Init, Vals, 0) == false) + return false; + cs_vals_[static_cast<const void *>(Init)] = Vals; + TmpVal = std::move(Vals); + } + + // test whether host endianness matches target + uint8_t test_buf[2]; + uint16_t test_val = 0x2345; + if (DL.isLittleEndian()) + support::endian::write16le(test_buf, test_val); + else + support::endian::write16be(test_buf, test_val); + + bool endian_match = *(uint16_t *)test_buf == test_val; + for (uint64_t i = Offset, j = 0; i < Offset + Size; i++, j++) + ByteSeq[j] = endian_match ? TmpVal[i] : TmpVal[Offset + Size - 1 - j]; + + return true; +} + +bool BPFDAGToDAGISel::fillGenericConstant(const DataLayout &DL, + const Constant *CV, + val_vec_type &Vals, uint64_t Offset) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) + return true; // already done + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + uint64_t val = CI->getZExtValue(); + DEBUG(dbgs() << "Byte array at offset " << Offset << " with value " << val + << '\n'); + + if (Size > 8 || (Size & (Size - 1))) + return false; + + // Store based on target endian + for (uint64_t i = 0; i < Size; ++i) { + Vals[Offset + i] = DL.isLittleEndian() + ? ((val >> (i * 8)) & 0xFF) + : ((val >> ((Size - i - 1) * 8)) & 0xFF); + } + return true; + } + + if (const ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(CV)) + return fillConstantDataArray(DL, CDA, Vals, Offset); + + if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) + return fillConstantArray(DL, CA, Vals, Offset); + + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) + return fillConstantStruct(DL, CVS, Vals, Offset); + + return false; +} + +bool BPFDAGToDAGISel::fillConstantDataArray(const DataLayout &DL, + const ConstantDataArray *CDA, + val_vec_type &Vals, int Offset) { + for (unsigned i = 0, e = CDA->getNumElements(); i != e; ++i) { + if (fillGenericConstant(DL, CDA->getElementAsConstant(i), Vals, Offset) == + false) + return false; + Offset += DL.getTypeAllocSize(CDA->getElementAsConstant(i)->getType()); + } + + return true; +} + +bool BPFDAGToDAGISel::fillConstantArray(const DataLayout &DL, + const ConstantArray *CA, + val_vec_type &Vals, int Offset) { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + if (fillGenericConstant(DL, CA->getOperand(i), Vals, Offset) == false) + return false; + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } + + return true; +} + +bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL, + const ConstantStruct *CS, + val_vec_type &Vals, int Offset) { + const StructLayout *Layout = DL.getStructLayout(CS->getType()); + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + const Constant *Field = CS->getOperand(i); + uint64_t SizeSoFar = Layout->getElementOffset(i); + if (fillGenericConstant(DL, Field, Vals, Offset + SizeSoFar) == false) + return false; + } + return true; +} + FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) { return new BPFDAGToDAGISel(TM); } diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index c6c0ff587c6b..5ad777268208 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -51,7 +51,7 @@ def u64imm : Operand<i64> { let PrintMethod = "printImm64Operand"; } -def i64immSExt32 : PatLeaf<(imm), +def i64immSExt32 : PatLeaf<(i64 imm), [{return isInt<32>(N->getSExtValue()); }]>; // Addressing modes. @@ -67,17 +67,17 @@ def MEMri : Operand<i64> { } // Conditional code predicates - used for pattern matching for jump instructions -def BPF_CC_EQ : PatLeaf<(imm), +def BPF_CC_EQ : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETEQ);}]>; -def BPF_CC_NE : PatLeaf<(imm), +def BPF_CC_NE : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETNE);}]>; -def BPF_CC_GE : PatLeaf<(imm), +def BPF_CC_GE : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETGE);}]>; -def BPF_CC_GT : PatLeaf<(imm), +def BPF_CC_GT : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETGT);}]>; -def BPF_CC_GTU : PatLeaf<(imm), +def BPF_CC_GTU : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETUGT);}]>; -def BPF_CC_GEU : PatLeaf<(imm), +def BPF_CC_GEU : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETUGE);}]>; // jump instructions diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index 3c37d9ebb0eb..11ac5454f604 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -59,9 +59,7 @@ namespace { public: static char ID; - HexagonGenMux() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) { - initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry()); - } + HexagonGenMux() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon generate mux instructions"; @@ -79,8 +77,8 @@ namespace { } private: - const HexagonInstrInfo *HII; - const HexagonRegisterInfo *HRI; + const HexagonInstrInfo *HII = nullptr; + const HexagonRegisterInfo *HRI = nullptr; struct CondsetInfo { unsigned PredR = 0; @@ -134,7 +132,7 @@ namespace { } // end anonymous namespace -INITIALIZE_PASS(HexagonGenMux, "hexagon-mux", +INITIALIZE_PASS(HexagonGenMux, "hexagon-gen-mux", "Hexagon generate mux instructions", false, false) void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const { @@ -297,12 +295,15 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; bool Failure = false, CanUp = true, CanDown = true; + bool Used1 = false, Used2 = false; for (unsigned X = MinX+1; X < MaxX; X++) { const DefUseInfo &DU = DUM.lookup(X); if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) { Failure = true; break; } + Used1 |= DU.Uses[SR1]; + Used2 |= DU.Uses[SR2]; if (CanDown && DU.Defs[SR1]) CanDown = false; if (CanUp && DU.Defs[SR2]) @@ -316,6 +317,45 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { // Prefer "down", since this will move the MUX farther away from the // predicate definition. MachineBasicBlock::iterator At = CanDown ? Def2 : Def1; + if (CanDown) { + // If the MUX is placed "down", we need to make sure that there aren't + // any kills of the source registers between the two defs. + if (Used1 || Used2) { + auto ResetKill = [this] (unsigned Reg, MachineInstr &MI) -> bool { + if (MachineOperand *Op = MI.findRegisterUseOperand(Reg, true, HRI)) { + Op->setIsKill(false); + return true; + } + return false; + }; + bool KilledSR1 = false, KilledSR2 = false; + for (MachineInstr &MJ : make_range(std::next(It1), It2)) { + if (SR1) + KilledSR1 |= ResetKill(SR1, MJ); + if (SR2) + KilledSR2 |= ResetKill(SR1, MJ); + } + // If any of the source registers were killed in this range, transfer + // the kills to the source operands: they will me "moved" to the + // resulting MUX and their parent instructions will be deleted. + if (KilledSR1) { + assert(Src1->isReg()); + Src1->setIsKill(true); + } + if (KilledSR2) { + assert(Src2->isReg()); + Src2->setIsKill(true); + } + } + } else { + // If the MUX is placed "up", it shouldn't kill any source registers + // that are still used afterwards. We can reset the kill flags directly + // on the operands, because the source instructions will be erased. + if (Used1 && Src1->isReg()) + Src1->setIsKill(false); + if (Used2 && Src2->isReg()) + Src2->setIsKill(false); + } ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); } diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index e4434136bf86..e5f49ca77a91 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -124,6 +124,7 @@ private: bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); bool isOrEquivalentToAdd(const SDNode *N) const; bool isAlignedMemNode(const MemSDNode *N) const; + bool isSmallStackStore(const StoreSDNode *N) const; bool isPositiveHalfWord(const SDNode *N) const; // DAG preprocessing functions. @@ -1462,6 +1463,20 @@ bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { return N->getAlignment() >= N->getMemoryVT().getStoreSize(); } +bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const { + unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF); + switch (N->getMemoryVT().getStoreSize()) { + case 1: + return StackSize <= 56; // 1*2^6 - 8 + case 2: + return StackSize <= 120; // 2*2^6 - 8 + case 4: + return StackSize <= 248; // 4*2^6 - 8 + default: + return false; + } +} + // Return true when the given node fits in a positive half word. bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) { diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index b748b58bc0ae..f82ad6cb3da6 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -1744,7 +1744,8 @@ bool PolynomialMultiplyRecognize::recognize() { // wide as the target's pmpy instruction. if (!promoteTypes(LoopB, ExitB)) return false; - convertShiftsToLeft(LoopB, ExitB, IterCount); + if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) + return false; cleanupLoopBody(LoopB); } diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index f269b74fc447..689419638f54 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -401,6 +401,11 @@ def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; +def: Pat<(i32 (trunc (sra (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(i32 (trunc (srl (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; + def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; @@ -1470,16 +1475,22 @@ def i32in8ImmPred: PatLeaf<(i32 imm), [{ return v == (int64_t)(int8_t)v; }]>; +class SmallStackStore<PatFrag Store> + : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ + return isSmallStackStore(cast<StoreSDNode>(N)); +}]>; let AddedComplexity = 40 in { // Even though the offset is not extendable in the store-immediate, we // can still generate the fi# in the base address. If the final offset // is not valid for the instruction, we will replace it with a scratch // register. -// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>; -// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf, -// S4_storeirh_io>; -// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>; + def: Storexm_fi_pat <SmallStackStore<truncstorei8>, s32_0ImmPred, + ToImmByte, S4_storeirb_io>; + def: Storexm_fi_pat <SmallStackStore<truncstorei16>, i16in8ImmPred, + ToImmHalf, S4_storeirh_io>; + def: Storexm_fi_pat <SmallStackStore<store>, i32in8ImmPred, + ToImmWord, S4_storeiri_io>; // defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte, // S4_storeirb_io>; diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index c757b6ecdd00..e507a797871f 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -111,6 +111,7 @@ namespace llvm { extern char &HexagonExpandCondsetsID; void initializeHexagonExpandCondsetsPass(PassRegistry&); void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); + void initializeHexagonGenMuxPass(PassRegistry&); void initializeHexagonOptAddrModePass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); @@ -152,8 +153,11 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { extern "C" void LLVMInitializeHexagonTarget() { // Register the target. RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget()); - initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - initializeHexagonOptAddrModePass(*PassRegistry::getPassRegistry()); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonLoopIdiomRecognizePass(PR); + initializeHexagonGenMuxPass(PR); + initializeHexagonOptAddrModePass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index f2193013b7aa..68708dc4f50f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -364,6 +364,18 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); + if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) { + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + } + + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); @@ -469,6 +481,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::SHL); @@ -918,14 +931,130 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, } } +static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, + const MipsSubtarget &Subtarget) { + // ROOTNode must have a multiplication as an operand for the match to be + // successful. + if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL && + ROOTNode->getOperand(1).getOpcode() != ISD::MUL) + return SDValue(); + + // We don't handle vector types here. + if (ROOTNode->getValueType(0).isVector()) + return SDValue(); + + // For MIPS64, madd / msub instructions are inefficent to use with 64 bit + // arithmetic. E.g. + // (add (mul a b) c) => + // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in + // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32) + // or + // MIPS64R2: (dins (mflo res) (mfhi res) 32 32) + // + // The overhead of setting up the Hi/Lo registers and reassembling the + // result makes this a dubious optimzation for MIPS64. The core of the + // problem is that Hi/Lo contain the upper and lower 32 bits of the + // operand and result. + // + // It requires a chain of 4 add/mul for MIPS64R2 to get better code + // density than doing it naively, 5 for MIPS64. Additionally, using + // madd/msub on MIPS64 requires the operands actually be 32 bit sign + // extended operands, not true 64 bit values. + // + // FIXME: For the moment, disable this completely for MIPS64. + if (Subtarget.hasMips64()) + return SDValue(); + + SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(0) + : ROOTNode->getOperand(1); + + SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(1) + : ROOTNode->getOperand(0); + + // Transform this to a MADD only if the user of this node is the add. + // If there are other users of the mul, this function returns here. + if (!Mult.hasOneUse()) + return SDValue(); + + // maddu and madd are unusual instructions in that on MIPS64 bits 63..31 + // must be in canonical form, i.e. sign extended. For MIPS32, the operands + // of the multiply must have 32 or more sign bits, otherwise we cannot + // perform this optimization. We have to check this here as we're performing + // this optimization pre-legalization. + SDValue MultLHS = Mult->getOperand(0); + SDValue MultRHS = Mult->getOperand(1); + unsigned LHSSB = CurDAG.ComputeNumSignBits(MultLHS); + unsigned RHSSB = CurDAG.ComputeNumSignBits(MultRHS); + + if (LHSSB < 32 || RHSSB < 32) + return SDValue(); + + APInt HighMask = + APInt::getHighBitsSet(Mult->getValueType(0).getScalarSizeInBits(), 32); + bool IsUnsigned = CurDAG.MaskedValueIsZero(Mult->getOperand(0), HighMask) && + CurDAG.MaskedValueIsZero(Mult->getOperand(1), HighMask) && + CurDAG.MaskedValueIsZero(AddOperand, HighMask); + + // Initialize accumulator. + SDLoc DL(ROOTNode); + SDValue TopHalf; + SDValue BottomHalf; + BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(0, DL)); + + TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(1, DL)); + SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + BottomHalf, + TopHalf); + + // Create MipsMAdd(u) / MipsMSub(u) node. + bool IsAdd = ROOTNode->getOpcode() == ISD::ADD; + unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd) + : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub); + SDValue MAddOps[3] = { + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)), + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn}; + EVT VTs[2] = {MVT::i32, MVT::i32}; + SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); + + SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); + SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); + SDValue Combined = + CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi); + return Combined; +} + +static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + // (sub v0 (mul v1, v2)) => (msub v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); + + return SDValue(); + } + + return SDValue(); +} + static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { - // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + // (add v0 (mul v1, v2)) => (madd v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); - if (DCI.isBeforeLegalizeOps()) return SDValue(); + } + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) @@ -1053,6 +1182,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performAssertZextCombine(N, DAG, DCI, Subtarget); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); + case ISD::SUB: + return performSUBCombine(N, DAG, DCI, Subtarget); } return SDValue(); diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index df62c66b75a3..4adf77f8d9a9 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -103,12 +103,9 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); for (unsigned i = 1; i < Cond.size(); ++i) { - if (Cond[i].isReg()) - MIB.addReg(Cond[i].getReg()); - else if (Cond[i].isImm()) - MIB.addImm(Cond[i].getImm()); - else - assert(false && "Cannot copy operand"); + assert((Cond[i].isImm() || Cond[i].isReg()) && + "Cannot copy operand for conditional branch!"); + MIB.add(Cond[i]); } MIB.addMBB(TBB); } diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index b95f1158fa56..272595af5f6f 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -274,8 +274,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { if (IsPIC) { MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); MF->insert(FallThroughMBB, BalTgtMBB); - LongBrMBB->addSuccessor(BalTgtMBB); - BalTgtMBB->addSuccessor(TgtMBB); + LongBrMBB->addSuccessor(BalTgtMBB, BranchProbability::getOne()); + BalTgtMBB->addSuccessor(&*FallThroughMBB, BranchProbability::getOne()); // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an @@ -342,8 +342,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP).addImm(8); if (Subtarget.hasMips32r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR)) - .addReg(Mips::ZERO).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR), Mips::ZERO) + .addReg(Mips::AT); else BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); @@ -415,8 +415,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP_64).addImm(0); if (Subtarget.hasMips64r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64)) - .addReg(Mips::ZERO_64).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64), Mips::ZERO_64) + .addReg(Mips::AT_64); else BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 49ae6dd4cd39..4be26dd25dc0 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -245,46 +245,64 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { } } -void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, - SDValue CmpLHS, const SDLoc &DL, - SDNode *Node) const { - unsigned Opc = InFlag.getOpcode(); (void)Opc; - - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; - if (Subtarget->isGP64bit()) { - SLTuOp = Mips::SLTu64; - ADDuOp = Mips::DADDu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; +void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { + SDValue InFlag = Node->getOperand(2); + unsigned Opc = InFlag.getOpcode(); SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); - - if (Subtarget->isGP64bit()) { - // On 64-bit targets, sltu produces an i64 but our backend currently says - // that SLTu64 produces an i32. We need to fix this in the long run but for - // now, just make the DAG type-correct by asserting the upper bits are zero. - Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, - CurDAG->getTargetConstant(0, DL, VT), - SDValue(Carry, 0), - CurDAG->getTargetConstant(Mips::sub_32, DL, - VT)); + // In the base case, we can rely on the carry bit from the addsc + // instruction. + if (Opc == ISD::ADDC) { + SDValue Ops[3] = {LHS, RHS, InFlag}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops); + return; } - // Generate a second addition only if we know that RHS is not a - // constant-zero node. - SDNode *AddCarry = Carry; - ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); - if (!C || C->getZExtValue()) - AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); + assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!"); + + // The more complex case is when there is a chain of ISD::ADDE nodes like: + // (adde (adde (adde (addc a b) c) d) e). + // + // The addwc instruction does not write to the carry bit, instead it writes + // to bit 20 of the dsp control register. To match this series of nodes, each + // intermediate adde node must be expanded to write the carry bit before the + // addition. + + // Start by reading the overflow field for addsc and moving the value to the + // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP + // corresponds to reading/writing the entire control register to/from a GPR. + + SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32); + + SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32); + + SDNode *DSPCtrlField = + CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag); + + SDNode *Carry = CurDAG->getMachineNode( + Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne); - CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); + SDValue Ops[4] = {SDValue(DSPCtrlField, 0), + CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne, + SDValue(Carry, 0)}; + SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); + + // My reading of the the MIPS DSP 3.01 specification isn't as clear as I + // would like about whether bit 20 always gets overwritten by addwc. + // Hence take an extremely conservative view and presume it's sticky. We + // therefore need to clear it. + + SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); + + SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)}; + SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps); + + SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue, + SDValue(DSPCtrlFinal, 0), CstOne); + + SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands); } /// Match frameindex @@ -765,19 +783,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { switch(Opcode) { default: break; - case ISD::SUBE: { - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu; - selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node); - return true; - } - case ISD::ADDE: { - if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. - break; - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu; - selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node); + selectAddE(Node, DL); return true; } diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index f89a350cab04..6f38289c5a45 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -41,8 +41,7 @@ private: const SDLoc &dl, EVT Ty, bool HasLo, bool HasHi); - void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, - const SDLoc &DL, SDNode *Node) const; + void selectAddE(SDNode *Node, const SDLoc &DL) const; bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index bf7f079e3105..2382ea271661 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -179,8 +179,6 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setTargetDAGCombine(ISD::ADDE); - setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -421,163 +419,6 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, return MipsTargetLowering::LowerOperation(Op, DAG); } -// selectMADD - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc multLo, Lo0), (adde multHi, Hi0), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { - // ADDENode's second operand must be a flag output of an ADDC node in order - // for the matching to be successful. - SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); - - if (ADDCNode->getOpcode() != ISD::ADDC) - return false; - - SDValue MultHi = ADDENode->getOperand(0); - SDValue MultLo = ADDCNode->getOperand(0); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MADD only if ADDENode and ADDCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than ADDENode or ADDCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MADD instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(ADDENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - ADDCNode->getOperand(1), - ADDENode->getOperand(1)); - - // create MipsMAdd(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - - SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of adde and addc here - if (!SDValue(ADDCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); - } - if (!SDValue(ADDENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); - } - - return true; -} - -// selectMSUB - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc Lo0, multLo), (sube Hi0, multHi), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { - // SUBENode's second operand must be a flag output of an SUBC node in order - // for the matching to be successful. - SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); - - if (SUBCNode->getOpcode() != ISD::SUBC) - return false; - - SDValue MultHi = SUBENode->getOperand(1); - SDValue MultLo = SUBCNode->getOperand(1); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MSUB only if SUBENode and SUBCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than SUBENode or SUBCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MSUB instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(SUBENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - SUBCNode->getOperand(0), - SUBENode->getOperand(0)); - - // create MipsSub(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - - SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of sube and subc here - if (!SDValue(SUBCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); - } - if (!SDValue(SUBENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); - } - - return true; -} - -static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT // // Performs the following transformations: @@ -820,19 +661,6 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && - selectMSUB(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Clear the upper (64 - VT.sizeInBits) bits. @@ -1110,16 +938,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SDValue Val; switch (N->getOpcode()) { - case ISD::ADDE: - return performADDECombine(N, DAG, DCI, Subtarget); case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; case ISD::OR: Val = performORCombine(N, DAG, DCI, Subtarget); break; - case ISD::SUBE: - return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: return performMULCombine(N, DAG, DCI, this); case ISD::SHL: diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 625a652a0ca0..ccd47f00c0d3 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -78,7 +78,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // IsNan2008 - IEEE 754-2008 NaN encoding. bool IsNaN2008bit; - // IsFP64bit - General-purpose registers are 64 bits wide + // IsGP64bit - General-purpose registers are 64 bits wide bool IsGP64bit; // IsPTR64bit - Pointers are 64 bit wide diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 28d496ee9ca1..afd2e87078a9 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2907,19 +2907,6 @@ SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, getI64Imm(58, dl), getI64Imm(63, dl)), 0); } - case ISD::SETNE: { - // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) - // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) - // {addcz.reg, addcz.CA} = (addcarry %a, -1) - // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue AC = - SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, - Xor, getI32Imm(~0U, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, - Xor, AC.getValue(1)), 0); - } } } @@ -2944,19 +2931,6 @@ SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, Addic, Addic.getValue(1)), 0); } - case ISD::SETNE: { - // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) - // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) - // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) - // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue SC = - SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, - Xor, getI32Imm(0, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, - SC, SC.getValue(1)), 0); - } } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bda4e5e81734..662550f7a396 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -136,7 +136,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } - // PowerPC has an i16 but no i8 (or i1) SEXTLOAD + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); @@ -175,7 +175,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } - // PowerPC does not support direct load / store of condition registers + // PowerPC does not support direct load/store of condition registers. setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); @@ -204,11 +204,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); - // PowerPC has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // PowerPC has no SREM/UREM instructions unless we are on P9 + // On P9 we may use a hardware instruction to compute the remainder. + // The instructions are not legalized directly because in the cases where the + // result of both the remainder and the division is required it is more + // efficient to compute the remainder from the result of the division rather + // than use the remainder instruction. + if (Subtarget.isISA3_0()) { + setOperationAction(ISD::SREM, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Custom); + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + } else { + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + } // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); @@ -1116,6 +1128,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; @@ -1598,22 +1611,34 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -// Check that the mask is shuffling N byte elements. -static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) { +/// Check that the mask is shuffling N byte elements. Within each N byte +/// element of the mask, the indices could be either in increasing or +/// decreasing order as long as they are consecutive. +/// \param[in] N the shuffle vector SD Node to analyze +/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ +/// Word/DoubleWord/QuadWord). +/// \param[in] StepLen the delta indices number among the N byte element, if +/// the mask is in increasing/decreasing order then it is 1/-1. +/// \return true iff the mask is shuffling N byte elements. +static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, + int StepLen) { assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && "Unexpected element width."); + assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); unsigned NumOfElem = 16 / Width; unsigned MaskVal[16]; // Width is never greater than 16 for (unsigned i = 0; i < NumOfElem; ++i) { MaskVal[0] = N->getMaskElt(i * Width); - if (MaskVal[0] % Width) { + if ((StepLen == 1) && (MaskVal[0] % Width)) { + return false; + } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { return false; } for (unsigned int j = 1; j < Width; ++j) { MaskVal[j] = N->getMaskElt(i * Width + j); - if (MaskVal[j] != MaskVal[j-1] + 1) { + if (MaskVal[j] != MaskVal[j-1] + StepLen) { return false; } } @@ -1624,7 +1649,7 @@ static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) { bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { - if (!isNByteElemShuffleMask(N, 4)) + if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12 @@ -1701,7 +1726,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the word is consecutive. - if (!isNByteElemShuffleMask(N, 4)) + if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12, which are the beginning of words. @@ -1759,6 +1784,35 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, } } +bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + if (!isNByteElemShuffleMask(N, Width, -1)) + return false; + + for (int i = 0; i < 16; i += Width) + if (N->getMaskElt(i) != i + Width - 1) + return false; + + return true; +} + +bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 2); +} + +bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 4); +} + +bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 8); +} + +bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 16); +} + /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap /// if the inputs to the instruction should be swapped and set \p DM to the /// value for the immediate. @@ -1772,7 +1826,7 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the double word is consecutive. - if (!isNByteElemShuffleMask(N, 8)) + if (!isNByteElemShuffleMask(N, 8, 1)) return false; unsigned M0 = N->getMaskElt(0) / 8; @@ -6819,6 +6873,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, // Given the head of the old chain, ResChain, insert a token factor containing // it and NewResChain, and make users of ResChain now be users of that token // factor. +// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const { @@ -7846,6 +7901,26 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); } + if (Subtarget.hasP9Vector()) { + if (PPC::isXXBRHShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); + } else if (PPC::isXXBRWShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); + } else if (PPC::isXXBRDShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); + } else if (PPC::isXXBRQShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); + SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); + } + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); @@ -8393,6 +8468,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, return SDValue(); } +SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { + // Check for a DIV with the same operands as this REM. + for (auto UI : Op.getOperand(1)->uses()) { + if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || + (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) + if (UI->getOperand(0) == Op.getOperand(0) && + UI->getOperand(1) == Op.getOperand(1)) + return SDValue(); + } + return Op; +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8861,6 +8948,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); + case ISD::SREM: + case ISD::UREM: + return LowerREM(Op, DAG); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 7982a4a9e9fb..a5108727bb4b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -86,6 +86,10 @@ namespace llvm { /// XXINSERT, + /// XXREVERSE - The PPC VSX reverse instruction + /// + XXREVERSE, + /// VECSHL - The PPC VSX shift left instruction /// VECSHL, @@ -458,6 +462,23 @@ namespace llvm { /// for a XXSLDWI instruction. bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE); + + /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRH instruction. + bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRW instruction. + bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRD instruction. + bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRQ instruction. + bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); + /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable /// for a XXPERMDI instruction. bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, @@ -918,6 +939,7 @@ namespace llvm { SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 295590b2acf6..70536a6039b8 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -683,6 +683,16 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divde $rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, isPPC64, Requires<[HasExtDiv]>; + +let Predicates = [IsISA3_0] in { +def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modsd $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (srem i64:$rA, i64:$rB))]>; +def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modud $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (urem i64:$rA, i64:$rB))]>; +} + let Defs = [CR0] in def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divde. $rT, $rA, $rB", IIC_IntDivD, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index f3c68c443b1b..236e513bec23 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1964,7 +1964,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case PPC::CFENCE8: { auto Val = MI.getOperand(0).getReg(); - BuildMI(MBB, MI, DL, get(PPC::CMPW), PPC::CR7).addReg(Val).addReg(Val); + BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP)) .addImm(PPC::PRED_NE_MINUS) .addReg(PPC::CR7) diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8223aa655e38..47d59c25392a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; +def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>, + SDTCisVec<1> +]>; + def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; @@ -174,6 +178,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; @@ -2544,6 +2549,14 @@ let Uses = [RM] in { "mffs. $rT", IIC_IntMFFS, []>, isDOT; } +let Predicates = [IsISA3_0] in { +def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "modsw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (srem i32:$rA, i32:$rB))]>; +def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "moduw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (urem i32:$rA, i32:$rB))]>; +} let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index e214d26c063b..9cfc897cdb3f 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -2340,6 +2340,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; + // Vector Reverse + def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)), + (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)), + (v4i32 (XXBRW $A))>; + def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)), + (v2i64 (XXBRD $A))>; + def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)), + (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + // Vector Permute def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, IIC_VecPerm, []>; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index aad913924692..637e52bbdbee 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -273,6 +273,20 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + if (TM.isELFv2ABI() && PhysReg == PPC::X2) { + // X2 is guaranteed to be preserved within a function if it is reserved. + // The reason it's reserved is that it's the TOC pointer (and the function + // uses the TOC). In functions where it isn't reserved (i.e. leaf functions + // with no TOC access), we can't claim that it is preserved. + return (getReservedRegs(MF).test(PPC::X2)); + } else { + return false; + } +} + unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const PPCFrameLowering *TFI = getFrameLowering(MF); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 4a96327fe552..0bbb71fdf9fb 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -83,6 +83,7 @@ public: void adjustStackMapLiveOutMask(uint32_t *Mask) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override; /// We require the register scavenger. bool requiresRegisterScavenging(const MachineFunction &MF) const override { diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 5559cdc5fe46..3dbd5f5b9a92 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -230,7 +230,7 @@ unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { return ST->hasVSX() ? 64 : 32; } -unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasQPX()) return 256; if (ST->hasAltivec()) return 128; diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 2e0116fee04c..758c335def08 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -63,7 +63,7 @@ public: bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getCacheLineSize(); unsigned getPrefetchDistance(); unsigned getMaxInterleaveFactor(unsigned VF); diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 6a3dc6799c43..422c16b8eb62 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -302,7 +302,7 @@ unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { return 0; } -unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const { if (!Vector) return 64; if (ST->hasVector()) diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index ad597f5c65f0..bdba7601eb78 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -53,7 +53,7 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; bool prefersVectorizedAddressing() { return false; } bool supportsEfficientVectorElementLoadStore() { return true; } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index ddf964e7dbb7..5ad147e5e596 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -46,9 +46,7 @@ public: /// .functype virtual void emitIndirectFunctionType(StringRef name, SmallVectorImpl<MVT> &Params, - SmallVectorImpl<MVT> &Results) { - llvm_unreachable("emitIndirectFunctionType not implemented"); - } + SmallVectorImpl<MVT> &Results) = 0; /// .indidx virtual void emitIndIdx(const MCExpr *Value) = 0; /// .import_global diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 27c01cb8acf7..19e14f3261aa 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -16,11 +16,15 @@ #include "MCTargetDesc/WebAssemblyFixupKinds.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCWasmObjectWriter.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" + using namespace llvm; namespace { @@ -29,8 +33,8 @@ public: explicit WebAssemblyWasmObjectWriter(bool Is64Bit); private: - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const override; + unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup) const override; }; } // end anonymous namespace @@ -39,16 +43,13 @@ WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit) // Test whether the given expression computes a function address. static bool IsFunctionExpr(const MCExpr *Expr) { - if (const MCSymbolRefExpr *SyExp = - dyn_cast<MCSymbolRefExpr>(Expr)) + if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr)) return cast<MCSymbolWasm>(SyExp->getSymbol()).isFunction(); - if (const MCBinaryExpr *BinOp = - dyn_cast<MCBinaryExpr>(Expr)) + if (auto BinOp = dyn_cast<MCBinaryExpr>(Expr)) return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS()); - if (const MCUnaryExpr *UnOp = - dyn_cast<MCUnaryExpr>(Expr)) + if (auto UnOp = dyn_cast<MCUnaryExpr>(Expr)) return IsFunctionExpr(UnOp->getSubExpr()); return false; @@ -59,15 +60,13 @@ static bool IsFunctionType(const MCValue &Target) { return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX; } -unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, - const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { +unsigned +WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup) const { // WebAssembly functions are not allocated in the data address space. To // resolve a pointer to a function, we must use a special relocation type. bool IsFunction = IsFunctionExpr(Fixup.getValue()); - assert(!IsPCRel); switch (unsigned(Fixup.getKind())) { case WebAssembly::fixup_code_sleb128_i32: if (IsFunction) diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 4178ec0b28f0..b999091e2d29 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -33,6 +33,8 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -218,9 +220,13 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - if (GV->getValueType()->isFunctionTy()) + if (GV->getValueType()->isFunctionTy()) { + MCSymbol* Sym = getSymbol(GV); + if (!isa<MCSymbolELF>(Sym)) + cast<MCSymbolWasm>(Sym)->setIsFunction(true); return MCSymbolRefExpr::create( - getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + Sym, MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + } return AsmPrinter::lowerConstant(CV); } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 47aadf99e860..b3ce4bd27460 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -36,7 +36,7 @@ unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) { return Result; } -unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector && getST()->hasSIMD128()) return 128; diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index f658609f8930..7b35fc916133 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -55,7 +55,7 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 831e9bdab0e1..172eba0002d4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1,4 +1,3 @@ - //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure @@ -5314,20 +5313,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); unsigned NumElts = SizeInBits / EltSizeInBits; - unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); - unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + // Bitcast a source array of element bits to the target size. + auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) { + unsigned NumSrcElts = UndefSrcElts.getBitWidth(); + unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth(); + assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits && + "Constant bit sizes don't match"); - // Extract all the undef/constant element data and pack into single bitsets. - APInt UndefBits(SizeInBits, 0); - APInt MaskBits(SizeInBits, 0); - - // Split the undef/constant single bitset data into the target elements. - auto SplitBitData = [&]() { // Don't split if we don't allow undef bits. bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; - if (UndefBits.getBoolValue() && !AllowUndefs) + if (UndefSrcElts.getBoolValue() && !AllowUndefs) return false; + // If we're already the right size, don't bother bitcasting. + if (NumSrcElts == NumElts) { + UndefElts = UndefSrcElts; + EltBits.assign(SrcEltBits.begin(), SrcEltBits.end()); + return true; + } + + // Extract all the undef/constant element data and pack into single bitsets. + APInt UndefBits(SizeInBits, 0); + APInt MaskBits(SizeInBits, 0); + + for (unsigned i = 0; i != NumSrcElts; ++i) { + unsigned BitOffset = i * SrcEltSizeInBits; + if (UndefSrcElts[i]) + UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); + MaskBits.insertBits(SrcEltBits[i], BitOffset); + } + + // Split the undef/constant single bitset data into the target elements. UndefElts = APInt(NumElts, 0); EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); @@ -5356,20 +5372,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, // Collect constant bits and insert into mask/undef bit masks. auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, - unsigned BitOffset) { + unsigned UndefBitIndex) { if (!Cst) return false; if (isa<UndefValue>(Cst)) { - unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); - Undefs.setBits(BitOffset, BitOffset + CstSizeInBits); + Undefs.setBit(UndefBitIndex); return true; } if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { - Mask.insertBits(CInt->getValue(), BitOffset); + Mask = CInt->getValue(); return true; } if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { - Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset); + Mask = CFP->getValueAPF().bitcastToAPInt(); return true; } return false; @@ -5377,18 +5392,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, // Extract constant bits from build vector. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { const SDValue &Src = Op.getOperand(i); - unsigned BitOffset = i * SrcEltSizeInBits; if (Src.isUndef()) { - UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); + UndefSrcElts.setBit(i); continue; } auto *Cst = cast<ConstantSDNode>(Src); - APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits.insertBits(Bits, BitOffset); + SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); } - return SplitBitData(); + return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from constant pool vector. @@ -5397,27 +5415,33 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits())) return false; - unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); - for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) - if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits, - i * CstEltSizeInBits)) + unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); + unsigned NumSrcElts = CstTy->getVectorNumElements(); + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); + for (unsigned i = 0; i != NumSrcElts; ++i) + if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], + UndefSrcElts, i)) return false; - return SplitBitData(); + return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from a broadcasted constant pool scalar. if (Op.getOpcode() == X86ISD::VBROADCAST && - EltSizeInBits <= SrcEltSizeInBits) { + EltSizeInBits <= VT.getScalarSizeInBits()) { if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) { - APInt Bits(SizeInBits, 0); - APInt Undefs(SizeInBits, 0); - if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) { - for (unsigned i = 0; i != NumSrcElts; ++i) { - MaskBits |= Bits.shl(i * SrcEltSizeInBits); - UndefBits |= Undefs.shl(i * SrcEltSizeInBits); - } - return SplitBitData(); + unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); + if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) { + if (UndefSrcElts[0]) + UndefSrcElts.setBits(0, NumSrcElts); + SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); + return CastBitData(UndefSrcElts, SrcEltBits); } } } @@ -5426,10 +5450,15 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (Op.getOpcode() == X86ISD::VZEXT_MOVL && Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) { + unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector<APInt, 64> SrcEltBits; auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); - MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits = MaskBits.zext(SizeInBits); - return SplitBitData(); + SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); + SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); + return CastBitData(UndefSrcElts, SrcEltBits); } return false; @@ -6491,16 +6520,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); - - if (LDBase->hasAnyUseOfValue(1)) { - SDValue NewChain = - DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - } - + DAG.makeEquivalentMemoryOrdering(LDBase, NewLd); return NewLd; }; @@ -6565,19 +6585,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); - - // Make sure the newly-created LOAD is in the same position as LDBase in - // terms of dependency. We create a TokenFactor for LDBase and ResNode, - // and update uses of LDBase's output chain to use the TokenFactor. - if (LDBase->hasAnyUseOfValue(1)) { - SDValue NewChain = - DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), - SDValue(ResNode.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), - SDValue(ResNode.getNode(), 1)); - } - + DAG.makeEquivalentMemoryOrdering(LDBase, ResNode); return DAG.getBitcast(VT, ResNode); } } @@ -9930,17 +9938,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); - - // Make sure the newly-created LOAD is in the same position as Ld in - // terms of dependency. We create a TokenFactor for Ld and V, - // and update uses of Ld's output chain to use the TokenFactor. - if (Ld->hasAnyUseOfValue(1)) { - SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - SDValue(Ld, 1), SDValue(V.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), - SDValue(V.getNode(), 1)); - } + DAG.makeEquivalentMemoryOrdering(Ld, V); } else if (!BroadcastFromReg) { // We can't broadcast from a vector register. return SDValue(); @@ -10891,9 +10889,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( "We need to be changing the number of flipped inputs!"); int PSHUFHalfMask[] = {0, 1, 2, 3}; std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); - V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, - MVT::v8i16, V, - getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); + V = DAG.getNode( + FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, + MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V, + getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); for (int &M : Mask) if (M >= 0 && M == FixIdx) @@ -12007,18 +12006,22 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { - // With AVX2 we should use VPERMQ/VPERMPD to allow memory folding. + // With AVX2, use VPERMQ/VPERMPD to allow memory folding. if (Subtarget.hasAVX2() && V2.isUndef()) return SDValue(); - MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, - DAG.getIntPtrConstant(0, DL)); - SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - OnlyUsesV1 ? V1 : V2, - DAG.getIntPtrConstant(0, DL)); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + // With AVX1, use vperm2f128 (below) to allow load folding. Otherwise, + // this will likely become vinsertf128 which can't fold a 256-bit memop. + if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), + VT.getVectorNumElements() / 2); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0, DL)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } } } @@ -19117,7 +19120,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask); if (Op.getOpcode() == X86ISD::FSETCCM || - Op.getOpcode() == X86ISD::FSETCCM_RND) + Op.getOpcode() == X86ISD::FSETCCM_RND) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); if (Op.getOpcode() == X86ISD::VFPCLASSS) return DAG.getNode(ISD::OR, dl, VT, Op, IMask); @@ -27968,28 +27971,45 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, OpMask.size() % RootMask.size() == 0) || OpMask.size() == RootMask.size()) && "The smaller number of elements must divide the larger."); - int MaskWidth = std::max<int>(OpMask.size(), RootMask.size()); - int RootRatio = std::max<int>(1, OpMask.size() / RootMask.size()); - int OpRatio = std::max<int>(1, RootMask.size() / OpMask.size()); - assert(((RootRatio == 1 && OpRatio == 1) || - (RootRatio == 1) != (OpRatio == 1)) && + + // This function can be performance-critical, so we rely on the power-of-2 + // knowledge that we have about the mask sizes to replace div/rem ops with + // bit-masks and shifts. + assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes"); + unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size()); + unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size()); + + unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size()); + unsigned RootRatio = std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2); + unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2); + assert((RootRatio == 1 || OpRatio == 1) && "Must not have a ratio for both incoming and op masks!"); - SmallVector<int, 64> Mask((unsigned)MaskWidth, SM_SentinelUndef); + assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); + unsigned RootRatioLog2 = countTrailingZeros(RootRatio); + unsigned OpRatioLog2 = countTrailingZeros(OpRatio); + + SmallVector<int, 64> Mask(MaskWidth, SM_SentinelUndef); // Merge this shuffle operation's mask into our accumulated mask. Note that // this shuffle's mask will be the first applied to the input, followed by the // root mask to get us all the way to the root value arrangement. The reason // for this order is that we are recursing up the operation chain. - for (int i = 0; i < MaskWidth; ++i) { - int RootIdx = i / RootRatio; + for (unsigned i = 0; i < MaskWidth; ++i) { + unsigned RootIdx = i >> RootRatioLog2; if (RootMask[RootIdx] < 0) { // This is a zero or undef lane, we're done. Mask[i] = RootMask[RootIdx]; continue; } - int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio; + unsigned RootMaskedIdx = + RootRatio == 1 + ? RootMask[RootIdx] + : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); // Just insert the scaled root mask value if it references an input other // than the SrcOp we're currently inserting. @@ -27999,9 +28019,8 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, continue; } - RootMaskedIdx %= MaskWidth; - - int OpIdx = RootMaskedIdx / OpRatio; + RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); + unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we // are using. @@ -28010,9 +28029,12 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, } // Ok, we have non-zero lanes, map them through to one of the Op's inputs. - int OpMaskedIdx = OpMask[OpIdx] * OpRatio + RootMaskedIdx % OpRatio; - OpMaskedIdx %= MaskWidth; + unsigned OpMaskedIdx = + OpRatio == 1 + ? OpMask[OpIdx] + : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1)); + OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); if (OpMask[OpIdx] < (int)OpMask.size()) { assert(0 <= InputIdx0 && "Unknown target shuffle input"); OpMaskedIdx += InputIdx0 * MaskWidth; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d8702693884d..2620679df251 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1631,6 +1631,7 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2)))))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = IsCommutable in def rrk : AVX512BI<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", @@ -1764,6 +1765,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = 1 in def rrik : AVX512AIi8<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, AVX512ICC:$cc), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 5224a16613cb..c28b35b22977 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -737,19 +737,15 @@ def alignedloadv8f64 : PatFrag<(ops node:$ptr), def alignedloadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (alignedload512 node:$ptr))>; -// Like 'load', but uses special alignment checks suitable for use in +// Like 'vec128load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to // be naturally aligned on some targets but not on others. If the subtarget // allows unaligned accesses, match any load, though this may require // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. -// Avoid non-temporal aligned loads on supported targets. -def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return (Subtarget->hasSSEUnalignedMem() || - cast<LoadSDNode>(N)->getAlignment() >= 16) && - (!Subtarget->hasSSE41() || - !(cast<LoadSDNode>(N)->getAlignment() >= 16 && - cast<LoadSDNode>(N)->isNonTemporal())); +def memop : PatFrag<(ops node:$ptr), (vec128load node:$ptr), [{ + return Subtarget->hasSSEUnalignedMem() || + cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; // 128-bit memop pattern fragments diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ff5d90c4e78b..f3094b781c49 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -898,10 +898,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, + { X86::VPCONFLICTDZrr, X86::VPCONFLICTDZrm, 0 }, + { X86::VPCONFLICTQZrr, X86::VPCONFLICTQZrm, 0 }, { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, { X86::VPERMQZri, X86::VPERMQZmi, 0 }, + { X86::VPLZCNTDZrr, X86::VPLZCNTDZrm, 0 }, + { X86::VPLZCNTQZrr, X86::VPLZCNTQZrm, 0 }, { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 }, { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE }, { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 }, @@ -948,10 +952,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, + { X86::VPCONFLICTDZ256rr, X86::VPCONFLICTDZ256rm, 0 }, + { X86::VPCONFLICTQZ256rr, X86::VPCONFLICTQZ256rm, 0 }, { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 }, { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 }, { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 }, + { X86::VPLZCNTDZ256rr, X86::VPLZCNTDZ256rm, 0 }, + { X86::VPLZCNTQZ256rr, X86::VPLZCNTQZ256rm, 0 }, { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 }, @@ -995,8 +1003,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, + { X86::VPCONFLICTDZ128rr, X86::VPCONFLICTDZ128rm, 0 }, + { X86::VPCONFLICTQZ128rr, X86::VPCONFLICTQZ128rm, 0 }, { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 }, + { X86::VPLZCNTDZ128rr, X86::VPLZCNTDZ128rm, 0 }, + { X86::VPLZCNTQZ128rr, X86::VPLZCNTQZ128rm, 0 }, { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE }, @@ -2312,10 +2324,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 }, { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 }, { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 }, + { X86::VPCONFLICTDZrrkz, X86::VPCONFLICTDZrmkz, 0 }, + { X86::VPCONFLICTQZrrkz, X86::VPCONFLICTQZrmkz, 0 }, { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 }, { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 }, { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 }, { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 }, + { X86::VPLZCNTDZrrkz, X86::VPLZCNTDZrmkz, 0 }, + { X86::VPLZCNTQZrrkz, X86::VPLZCNTQZrmkz, 0 }, { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 }, { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 }, @@ -2350,10 +2366,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 }, { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 }, { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 }, + { X86::VPCONFLICTDZ256rrkz, X86::VPCONFLICTDZ256rmkz, 0 }, + { X86::VPCONFLICTQZ256rrkz, X86::VPCONFLICTQZ256rmkz, 0 }, { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 }, { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 }, { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 }, { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 }, + { X86::VPLZCNTDZ256rrkz, X86::VPLZCNTDZ256rmkz, 0 }, + { X86::VPLZCNTQZ256rrkz, X86::VPLZCNTQZ256rmkz, 0 }, { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 }, @@ -2385,8 +2405,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 }, { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 }, { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 }, + { X86::VPCONFLICTDZ128rrkz, X86::VPCONFLICTDZ128rmkz, 0 }, + { X86::VPCONFLICTQZ128rrkz, X86::VPCONFLICTQZ128rmkz, 0 }, { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 }, { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 }, + { X86::VPLZCNTDZ128rrkz, X86::VPLZCNTDZ128rmkz, 0 }, + { X86::VPLZCNTQZ128rrkz, X86::VPLZCNTQZ128rmkz, 0 }, { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE }, @@ -2935,10 +2959,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 }, { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 }, { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 }, + { X86::VPCONFLICTDZrrk, X86::VPCONFLICTDZrmk, 0 }, + { X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 }, { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 }, { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 }, { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 }, { X86::VPERMQZrik, X86::VPERMQZmik, 0 }, + { X86::VPLZCNTDZrrk, X86::VPLZCNTDZrmk, 0 }, + { X86::VPLZCNTQZrrk, X86::VPLZCNTQZrmk, 0 }, { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 }, { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 }, @@ -2973,10 +3001,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 }, { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 }, { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 }, + { X86::VPCONFLICTDZ256rrk, X86::VPCONFLICTDZ256rmk, 0 }, + { X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 }, { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 }, { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 }, { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 }, { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 }, + { X86::VPLZCNTDZ256rrk, X86::VPLZCNTDZ256rmk, 0 }, + { X86::VPLZCNTQZ256rrk, X86::VPLZCNTQZ256rmk, 0 }, { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 }, @@ -3008,8 +3040,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 }, { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 }, { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 }, + { X86::VPCONFLICTDZ128rrk, X86::VPCONFLICTDZ128rmk, 0 }, + { X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 }, { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 }, { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 }, + { X86::VPLZCNTDZ128rrk, X86::VPLZCNTDZ128rmk, 0 }, + { X86::VPLZCNTQZ128rrk, X86::VPLZCNTQZ128rmk, 0 }, { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE }, @@ -3034,6 +3070,64 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 }, { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 }, { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 }, + + // AVX-512 masked compare instructions + { X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 }, + { X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 }, + { X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 }, + { X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 }, + { X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 }, + { X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 }, + { X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE }, + { X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE }, + { X86::VPCMPBZ128rrik, X86::VPCMPBZ128rmik, 0 }, + { X86::VPCMPBZ256rrik, X86::VPCMPBZ256rmik, 0 }, + { X86::VPCMPBZrrik, X86::VPCMPBZrmik, 0 }, + { X86::VPCMPDZ128rrik, X86::VPCMPDZ128rmik, 0 }, + { X86::VPCMPDZ256rrik, X86::VPCMPDZ256rmik, 0 }, + { X86::VPCMPDZrrik, X86::VPCMPDZrmik, 0 }, + { X86::VPCMPEQBZ128rrk, X86::VPCMPEQBZ128rmk, 0 }, + { X86::VPCMPEQBZ256rrk, X86::VPCMPEQBZ256rmk, 0 }, + { X86::VPCMPEQBZrrk, X86::VPCMPEQBZrmk, 0 }, + { X86::VPCMPEQDZ128rrk, X86::VPCMPEQDZ128rmk, 0 }, + { X86::VPCMPEQDZ256rrk, X86::VPCMPEQDZ256rmk, 0 }, + { X86::VPCMPEQDZrrk, X86::VPCMPEQDZrmk, 0 }, + { X86::VPCMPEQQZ128rrk, X86::VPCMPEQQZ128rmk, 0 }, + { X86::VPCMPEQQZ256rrk, X86::VPCMPEQQZ256rmk, 0 }, + { X86::VPCMPEQQZrrk, X86::VPCMPEQQZrmk, 0 }, + { X86::VPCMPEQWZ128rrk, X86::VPCMPEQWZ128rmk, 0 }, + { X86::VPCMPEQWZ256rrk, X86::VPCMPEQWZ256rmk, 0 }, + { X86::VPCMPEQWZrrk, X86::VPCMPEQWZrmk, 0 }, + { X86::VPCMPGTBZ128rrk, X86::VPCMPGTBZ128rmk, 0 }, + { X86::VPCMPGTBZ256rrk, X86::VPCMPGTBZ256rmk, 0 }, + { X86::VPCMPGTBZrrk, X86::VPCMPGTBZrmk, 0 }, + { X86::VPCMPGTDZ128rrk, X86::VPCMPGTDZ128rmk, 0 }, + { X86::VPCMPGTDZ256rrk, X86::VPCMPGTDZ256rmk, 0 }, + { X86::VPCMPGTDZrrk, X86::VPCMPGTDZrmk, 0 }, + { X86::VPCMPGTQZ128rrk, X86::VPCMPGTQZ128rmk, 0 }, + { X86::VPCMPGTQZ256rrk, X86::VPCMPGTQZ256rmk, 0 }, + { X86::VPCMPGTQZrrk, X86::VPCMPGTQZrmk, 0 }, + { X86::VPCMPGTWZ128rrk, X86::VPCMPGTWZ128rmk, 0 }, + { X86::VPCMPGTWZ256rrk, X86::VPCMPGTWZ256rmk, 0 }, + { X86::VPCMPGTWZrrk, X86::VPCMPGTWZrmk, 0 }, + { X86::VPCMPQZ128rrik, X86::VPCMPQZ128rmik, 0 }, + { X86::VPCMPQZ256rrik, X86::VPCMPQZ256rmik, 0 }, + { X86::VPCMPQZrrik, X86::VPCMPQZrmik, 0 }, + { X86::VPCMPUBZ128rrik, X86::VPCMPUBZ128rmik, 0 }, + { X86::VPCMPUBZ256rrik, X86::VPCMPUBZ256rmik, 0 }, + { X86::VPCMPUBZrrik, X86::VPCMPUBZrmik, 0 }, + { X86::VPCMPUDZ128rrik, X86::VPCMPUDZ128rmik, 0 }, + { X86::VPCMPUDZ256rrik, X86::VPCMPUDZ256rmik, 0 }, + { X86::VPCMPUDZrrik, X86::VPCMPUDZrmik, 0 }, + { X86::VPCMPUQZ128rrik, X86::VPCMPUQZ128rmik, 0 }, + { X86::VPCMPUQZ256rrik, X86::VPCMPUQZ256rmik, 0 }, + { X86::VPCMPUQZrrik, X86::VPCMPUQZrmik, 0 }, + { X86::VPCMPUWZ128rrik, X86::VPCMPUWZ128rmik, 0 }, + { X86::VPCMPUWZ256rrik, X86::VPCMPUWZ256rmik, 0 }, + { X86::VPCMPUWZrrik, X86::VPCMPUWZrmik, 0 }, + { X86::VPCMPWZ128rrik, X86::VPCMPWZ128rmik, 0 }, + { X86::VPCMPWZ256rrik, X86::VPCMPWZ256rmik, 0 }, + { X86::VPCMPWZrrik, X86::VPCMPWZrmik, 0 }, }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { @@ -5136,20 +5230,32 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return nullptr; } } - case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: - case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: - case X86::VPCMPBZrri: case X86::VPCMPUBZrri: - case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: - case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: - case X86::VPCMPDZrri: case X86::VPCMPUDZrri: - case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: - case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: - case X86::VPCMPQZrri: case X86::VPCMPUQZrri: - case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: - case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: - case X86::VPCMPWZrri: case X86::VPCMPUWZrri: { + case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: + case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: + case X86::VPCMPBZrri: case X86::VPCMPUBZrri: + case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: + case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: + case X86::VPCMPDZrri: case X86::VPCMPUDZrri: + case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: + case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: + case X86::VPCMPQZrri: case X86::VPCMPUQZrri: + case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: + case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: + case X86::VPCMPWZrri: case X86::VPCMPUWZrri: + case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik: + case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik: + case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik: + case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik: + case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik: + case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik: + case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik: + case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik: + case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik: + case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik: + case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik: + case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: { // Flip comparison mode immediate (if necessary). - unsigned Imm = MI.getOperand(3).getImm() & 0x7; + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7; switch (Imm) { default: llvm_unreachable("Unreachable!"); case 0x01: Imm = 0x06; break; // LT -> NLE @@ -5163,7 +5269,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, break; } auto &WorkingMI = cloneIfNew(MI); - WorkingMI.getOperand(3).setImm(Imm); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } diff --git a/lib/Testing/CMakeLists.txt b/lib/Testing/CMakeLists.txt new file mode 100644 index 000000000000..fc23e64eeb7a --- /dev/null +++ b/lib/Testing/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Support) diff --git a/lib/Testing/LLVMBuild.txt b/lib/Testing/LLVMBuild.txt new file mode 100644 index 000000000000..cdf83736298e --- /dev/null +++ b/lib/Testing/LLVMBuild.txt @@ -0,0 +1,19 @@ +;===- ./lib/Testing/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = Support diff --git a/lib/Testing/Support/CMakeLists.txt b/lib/Testing/Support/CMakeLists.txt new file mode 100644 index 000000000000..fa8dfe59c8bd --- /dev/null +++ b/lib/Testing/Support/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(LLVMTestingSupport + Error.cpp + + BUILDTREE_ONLY + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Testing/Support + ) + +include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) +include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include) +target_link_libraries(LLVMTestingSupport PRIVATE gtest)
\ No newline at end of file diff --git a/lib/Testing/Support/Error.cpp b/lib/Testing/Support/Error.cpp new file mode 100644 index 000000000000..ce0da44da408 --- /dev/null +++ b/lib/Testing/Support/Error.cpp @@ -0,0 +1,22 @@ +//===- llvm/Testing/Support/Error.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Testing/Support/Error.h" + +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +llvm::detail::ErrorHolder llvm::detail::TakeError(llvm::Error Err) { + bool Succeeded = !static_cast<bool>(Err); + std::string Message; + if (!Succeeded) + Message = toString(std::move(Err)); + return {Succeeded, Message}; +} diff --git a/lib/Testing/Support/LLVMBuild.txt b/lib/Testing/Support/LLVMBuild.txt new file mode 100644 index 000000000000..40853e8172d5 --- /dev/null +++ b/lib/Testing/Support/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./Testing/Support/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = TestingSupport +parent = Libraries +required_libraries = Support diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index 1b111de06157..d94aa5da8560 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -95,6 +95,17 @@ void CrossDSOCFI::buildCFICheck(Module &M) { } } + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto Func : CfiFunctionsMD->operands()) { + assert(Func->getNumOperands() >= 2); + for (unsigned I = 2; I < Func->getNumOperands(); ++I) + if (ConstantInt *TypeId = + extractNumericTypeId(cast<MDNode>(Func->getOperand(I).get()))) + TypeIds.insert(TypeId->getZExtValue()); + } + } + LLVMContext &Ctx = M.getContext(); Constant *C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index c0dfeede05c5..ad89e40661c6 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -523,40 +523,47 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, if (!Callee || Callee->isDeclaration()) continue; - // If this call site is dead and it is to a readonly function, we should - // just delete the call instead of trying to inline it, regardless of - // size. This happens because IPSCCP propagates the result out of the - // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { - DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() - << "\n"); - // Update the call graph by deleting the edge from Callee to Caller. - CG[Caller]->removeCallEdgeFor(CS); - CS.getInstruction()->eraseFromParent(); - ++NumCallsDeleted; - } else { + Instruction *Instr = CS.getInstruction(); + + bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); + + int InlineHistoryID; + if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. - int InlineHistoryID = CallSites[CSi].second; + InlineHistoryID = CallSites[CSi].second; if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; + } + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); + + // If the policy determines that we should inline this function, + // delete the call instead. + if (!shouldInline(CS, GetInlineCost, ORE)) + continue; + + // If this call site is dead and it is to a readonly function, we should + // just delete the call instead of trying to inline it, regardless of + // size. This happens because IPSCCP propagates the result out of the + // call and then we're left with the dead call. + if (IsTriviallyDead) { + DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n"); + // Update the call graph by deleting the edge from Callee to Caller. + CG[Caller]->removeCallEdgeFor(CS); + Instr->eraseFromParent(); + ++NumCallsDeleted; + } else { // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + DebugLoc DLoc = Instr->getDebugLoc(); BasicBlock *Block = CS.getParent(); - // FIXME for new PM: because of the old PM we currently generate ORE and - // in turn BFI on demand. With the new PM, the ORE dependency should - // just become a regular analysis dependency. - OptimizationRemarkEmitter ORE(Caller); - - // If the policy determines that we should inline this function, - // try to do so. - if (!shouldInline(CS, GetInlineCost, ORE)) - continue; // Attempt to inline the function. using namespace ore; diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 90896d285f5a..b406c22c69d7 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -206,17 +207,26 @@ struct ByteArrayInfo { class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> { GlobalObject *GO; size_t NTypes; + // For functions: true if this is a definition (either in the merged module or + // in one of the thinlto modules). + bool IsDefinition; + // For functions: true if this function is either defined or used in a thinlto + // module and its jumptable entry needs to be exported to thinlto backends. + bool IsExported; friend TrailingObjects; size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; } public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, + bool IsDefinition, bool IsExported, ArrayRef<MDNode *> Types) { auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate( totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); + GTM->IsDefinition = IsDefinition; + GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects<MDNode *>()); return GTM; @@ -224,6 +234,12 @@ public: GlobalObject *getGlobal() const { return GO; } + bool isDefinition() const { + return IsDefinition; + } + bool isExported() const { + return IsExported; + } ArrayRef<MDNode *> types() const { return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes); } @@ -294,6 +310,7 @@ class LowerTypeTestsModule { void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); + void importFunction(Function *F, bool isDefinition); BitSetInfo buildBitSet(Metadata *TypeId, @@ -820,6 +837,41 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { CI->eraseFromParent(); } +// ThinLTO backend: the function F has a jump table entry; update this module +// accordingly. isDefinition describes the type of the jump table entry. +void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { + assert(F->getType()->getAddressSpace() == 0); + + // Declaration of a local function - nothing to do. + if (F->isDeclarationForLinker() && isDefinition) + return; + + GlobalValue::VisibilityTypes Visibility = F->getVisibility(); + std::string Name = F->getName(); + Function *FDecl; + + if (F->isDeclarationForLinker() && !isDefinition) { + // Declaration of an external function. + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name + ".cfi_jt", &M); + FDecl->setVisibility(GlobalValue::HiddenVisibility); + } else { + // Definition. + assert(isDefinition); + F->setName(Name + ".cfi"); + F->setLinkage(GlobalValue::ExternalLinkage); + F->setVisibility(GlobalValue::HiddenVisibility); + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name, &M); + FDecl->setVisibility(Visibility); + } + + if (F->isWeakForLinker()) + replaceWeakDeclarationWithJumpTablePtr(F, FDecl); + else + F->replaceAllUsesWith(FDecl); +} + void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { @@ -1143,7 +1195,6 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // arithmetic that we normally use for globals. // FIXME: find a better way to represent the jumptable in the IR. - assert(!Functions.empty()); // Build a simple layout based on the regular layout of jump tables. @@ -1167,6 +1218,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // references to the original functions with references to the aliases. for (unsigned I = 0; I != Functions.size(); ++I) { Function *F = cast<Function>(Functions[I]->getGlobal()); + bool IsDefinition = Functions[I]->isDefinition(); Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( ConstantExpr::getInBoundsGetElementPtr( @@ -1174,7 +1226,18 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, I)}), F->getType()); - if (F->isDeclarationForLinker()) { + if (Functions[I]->isExported()) { + if (IsDefinition) { + ExportSummary->cfiFunctionDefs().insert(F->getName()); + } else { + GlobalAlias *JtAlias = GlobalAlias::create( + F->getValueType(), 0, GlobalValue::ExternalLinkage, + F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); + JtAlias->setVisibility(GlobalValue::HiddenVisibility); + ExportSummary->cfiFunctionDecls().insert(F->getName()); + } + } + if (!IsDefinition) { if (F->isWeakForLinker()) replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr); else @@ -1182,9 +1245,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( } else { assert(F->getType()->getAddressSpace() == 0); - GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0, - F->getLinkage(), "", - CombinedGlobalElemPtr, &M); + GlobalAlias *FAlias = GlobalAlias::create( + F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); FAlias->setVisibility(F->getVisibility()); FAlias->takeName(F); if (FAlias->hasName()) @@ -1353,15 +1415,37 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { bool LowerTypeTestsModule::lower() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); - if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary) + if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary && + !ImportSummary) return false; if (ImportSummary) { - for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); - UI != UE;) { - auto *CI = cast<CallInst>((*UI++).getUser()); - importTypeTest(CI); + if (TypeTestFunc) { + for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); + UI != UE;) { + auto *CI = cast<CallInst>((*UI++).getUser()); + importTypeTest(CI); + } + } + + SmallVector<Function *, 8> Defs; + SmallVector<Function *, 8> Decls; + for (auto &F : M) { + // CFI functions are either external, or promoted. A local function may + // have the same name, but it's not the one we are looking for. + if (F.hasLocalLinkage()) + continue; + if (ImportSummary->cfiFunctionDefs().count(F.getName())) + Defs.push_back(&F); + else if (ImportSummary->cfiFunctionDecls().count(F.getName())) + Decls.push_back(&F); } + + for (auto F : Defs) + importFunction(F, /*isDefinition*/ true); + for (auto F : Decls) + importFunction(F, /*isDefinition*/ false); + return true; } @@ -1387,6 +1471,58 @@ bool LowerTypeTestsModule::lower() { llvm::DenseMap<Metadata *, TIInfo> TypeIdInfo; unsigned I = 0; SmallVector<MDNode *, 2> Types; + + struct ExportedFunctionInfo { + CfiFunctionLinkage Linkage; + MDNode *FuncMD; // {name, linkage, type[, type...]} + }; + DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions; + if (ExportSummary) { + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto FuncMD : CfiFunctionsMD->operands()) { + assert(FuncMD->getNumOperands() >= 2); + StringRef FunctionName = + cast<MDString>(FuncMD->getOperand(0))->getString(); + if (!ExportSummary->isGUIDLive(GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(FunctionName)))) + continue; + CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>( + cast<ConstantAsMetadata>(FuncMD->getOperand(1)) + ->getValue() + ->getUniqueInteger() + .getZExtValue()); + auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}}); + if (!P.second && P.first->second.Linkage != CFL_Definition) + P.first->second = {Linkage, FuncMD}; + } + + for (const auto &P : ExportedFunctions) { + StringRef FunctionName = P.first; + CfiFunctionLinkage Linkage = P.second.Linkage; + MDNode *FuncMD = P.second.FuncMD; + Function *F = M.getFunction(FunctionName); + if (!F) + F = Function::Create( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalVariable::ExternalLinkage, FunctionName, &M); + + if (Linkage == CFL_Definition) + F->eraseMetadata(LLVMContext::MD_type); + + if (F->isDeclaration()) { + if (Linkage == CFL_WeakDeclaration) + F->setLinkage(GlobalValue::ExternalWeakLinkage); + + SmallVector<MDNode *, 2> Types; + for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) + F->addMetadata(LLVMContext::MD_type, + *cast<MDNode>(FuncMD->getOperand(I).get())); + } + } + } + } + for (GlobalObject &GO : M.global_objects()) { if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker()) continue; @@ -1396,7 +1532,15 @@ bool LowerTypeTestsModule::lower() { if (Types.empty()) continue; - auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types); + bool IsDefinition = !GO.isDeclarationForLinker(); + bool IsExported = false; + if (isa<Function>(GO) && ExportedFunctions.count(GO.getName())) { + IsDefinition |= ExportedFunctions[GO.getName()].Linkage == CFL_Definition; + IsExported = true; + } + + auto *GTM = + GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); auto &Info = TypeIdInfo[cast<MDNode>(Type)->getOperand(1)]; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index ea805efc66b7..8840435af642 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -103,6 +103,35 @@ struct PartialInlinerImpl { bool run(Module &M); Function *unswitchFunction(Function *F); + // This class speculatively clones the the function to be partial inlined. + // At the end of partial inlining, the remaining callsites to the cloned + // function that are not partially inlined will be fixed up to reference + // the original function, and the cloned function will be erased. + struct FunctionCloner { + FunctionCloner(Function *F, FunctionOutliningInfo *OI); + ~FunctionCloner(); + + // Prepare for function outlining: making sure there is only + // one incoming edge from the extracted/outlined region to + // the return block. + void NormalizeReturnBlock(); + + // Do function outlining: + Function *doFunctionOutlining(); + + Function *OrigFunc = nullptr; + Function *ClonedFunc = nullptr; + Function *OutlinedFunc = nullptr; + BasicBlock *OutliningCallBB = nullptr; + // ClonedFunc is inlined in one of its callers after function + // outlining. + bool IsFunctionInlined = false; + // The cost of the region to be outlined. + int OutlinedRegionCost = 0; + std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr; + std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr; + }; + private: int NumPartialInlining = 0; std::function<AssumptionCache &(Function &)> *GetAssumptionCache; @@ -114,27 +143,18 @@ private: // The result is no larger than 1 and is represented using BP. // (Note that the outlined region's 'head' block can only have incoming // edges from the guarding entry blocks). - BranchProbability getOutliningCallBBRelativeFreq(Function *F, - FunctionOutliningInfo *OI, - Function *DuplicateFunction, - BlockFrequencyInfo *BFI, - BasicBlock *OutliningCallBB); + BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner); // Return true if the callee of CS should be partially inlined with // profit. - bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI, - BlockFrequencyInfo *CalleeBFI, - BasicBlock *OutliningCallBB, - int OutliningCallOverhead, + bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, + BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE); // Try to inline DuplicateFunction (cloned from F with call to // the OutlinedFunction into its callers. Return true // if there is any successful inlining. - bool tryPartialInline(Function *DuplicateFunction, - Function *F, /*orignal function */ - FunctionOutliningInfo *OI, Function *OutlinedFunction, - BlockFrequencyInfo *CalleeBFI); + bool tryPartialInline(FunctionCloner &Cloner); // Compute the mapping from use site of DuplicationFunction to the enclosing // BB's profile count. @@ -146,7 +166,7 @@ private: NumPartialInlining >= MaxNumPartialInlining); } - CallSite getCallSite(User *U) { + static CallSite getCallSite(User *U) { CallSite CS; if (CallInst *CI = dyn_cast<CallInst>(U)) CS = CallSite(CI); @@ -157,7 +177,7 @@ private: return CS; } - CallSite getOneCallSiteTo(Function *F) { + static CallSite getOneCallSiteTo(Function *F) { User *User = *F->user_begin(); return getCallSite(User); } @@ -171,20 +191,15 @@ private: // Returns the costs associated with function outlining: // - The first value is the non-weighted runtime cost for making the call - // to the outlined function 'OutlinedFunction', including the addtional - // setup cost in the outlined function itself; + // to the outlined function, including the addtional setup cost in the + // outlined function itself; // - The second value is the estimated size of the new call sequence in - // basic block 'OutliningCallBB'; - // - The third value is the estimated size of the original code from - // function 'F' that is extracted into the outlined function. - std::tuple<int, int, int> - computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo, - Function *OutlinedFunction, - BasicBlock *OutliningCallBB); + // basic block Cloner.OutliningCallBB; + std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner); // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to // approximate both the size and runtime cost (Note that in the current // inline cost analysis, there is no clear distinction there either). - int computeBBInlineCost(BasicBlock *BB); + static int computeBBInlineCost(BasicBlock *BB); std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); @@ -396,19 +411,19 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) { return false; } -BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( - Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction, - BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) { +BranchProbability +PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { auto EntryFreq = - BFI->getBlockFreq(&DuplicateFunction->getEntryBlock()); - auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB); + Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock()); + auto OutliningCallFreq = + Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB); auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(), EntryFreq.getFrequency()); - if (hasProfileData(F, OI)) + if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get())) return OutlineRegionRelFreq; // When profile data is not available, we need to be conservative in @@ -433,15 +448,17 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( } bool PartialInlinerImpl::shouldPartialInline( - CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI, - BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB, - int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) { + CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE) { + using namespace ore; if (SkipCostAnalysis) return true; Instruction *Call = CS.getInstruction(); Function *Callee = CS.getCalledFunction(); + assert(Callee == Cloner.ClonedFunc); + Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, @@ -449,14 +466,14 @@ bool PartialInlinerImpl::shouldPartialInline( if (IC.isAlways()) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) - << NV("Callee", F) + << NV("Callee", Cloner.OrigFunc) << " should always be fully inlined, not partially"); return false; } if (IC.isNever()) { ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because it should never be inlined (cost=never)"); return false; @@ -464,29 +481,25 @@ bool PartialInlinerImpl::shouldPartialInline( if (!IC) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because too costly to inline (cost=" << NV("Cost", IC.getCost()) << ", threshold=" << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); return false; } const DataLayout &DL = Caller->getParent()->getDataLayout(); + // The savings of eliminating the call: int NonWeightedSavings = getCallsiteCost(CS, DL); BlockFrequency NormWeightedSavings(NonWeightedSavings); - auto RelativeFreq = - getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB); - auto NormWeightedRcost = - BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq; - // Weighted saving is smaller than weighted cost, return false - if (NormWeightedSavings < NormWeightedRcost) { + if (NormWeightedSavings < WeightedOutliningRcost) { ORE.emit( OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call) - << NV("Callee", F) << " not partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " runtime overhead (overhead=" - << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency()) + << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) << ", savings=" << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")" << " of making the outlined call is too high"); @@ -495,7 +508,7 @@ bool PartialInlinerImpl::shouldPartialInline( } ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) - << NV("Callee", F) << " can be partially inlined into " + << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) << " (threshold=" << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); @@ -551,50 +564,32 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { return InlineCost; } -std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts( - Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction, - BasicBlock *OutliningCallBB) { - // First compute the cost of the outlined region 'OI' in the original - // function 'F'. - // FIXME: The code extractor (outliner) can now do code sinking/hoisting - // to reduce outlining cost. The hoisted/sunk code currently do not - // incur any runtime cost so it is still OK to compare the outlined - // function cost with the outlined region in the original function. - // If this ever changes, we will need to introduce new extractor api - // to pass the information. - int OutlinedRegionCost = 0; - for (BasicBlock &BB : *F) { - if (&BB != OI->ReturnBlock && - // Assuming Entry set is small -- do a linear search here: - std::find(OI->Entries.begin(), OI->Entries.end(), &BB) == - OI->Entries.end()) { - OutlinedRegionCost += computeBBInlineCost(&BB); - } - } +std::tuple<int, int> +PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { // Now compute the cost of the call sequence to the outlined function // 'OutlinedFunction' in BB 'OutliningCallBB': - int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB); + int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB); // Now compute the cost of the extracted/outlined function itself: int OutlinedFunctionCost = 0; - for (BasicBlock &BB : *OutlinedFunction) { + for (BasicBlock &BB : *Cloner.OutlinedFunc) { OutlinedFunctionCost += computeBBInlineCost(&BB); } - assert(OutlinedFunctionCost >= OutlinedRegionCost && + assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); // The code extractor introduces a new root and exit stub blocks with // additional unconditional branches. Those branches will be eliminated // later with bb layout. The cost should be adjusted accordingly: OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; - int OutliningRuntimeOverhead = OutliningFuncCallCost + - (OutlinedFunctionCost - OutlinedRegionCost) + - ExtraOutliningPenalty; + int OutliningRuntimeOverhead = + OutliningFuncCallCost + + (OutlinedFunctionCost - Cloner.OutlinedRegionCost) + + ExtraOutliningPenalty; - return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead, - OutlinedRegionCost); + return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead); } // Create the callsite to profile count map which is @@ -641,42 +636,30 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( } } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { - - if (F->hasAddressTaken()) - return nullptr; - - // Let inliner handle it - if (F->hasFnAttribute(Attribute::AlwaysInline)) - return nullptr; - - if (F->hasFnAttribute(Attribute::NoInline)) - return nullptr; - - if (PSI->isFunctionEntryCold(F)) - return nullptr; - - if (F->user_begin() == F->user_end()) - return nullptr; - - std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F); - - if (!OI) - return nullptr; +PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, + FunctionOutliningInfo *OI) + : OrigFunc(F) { + ClonedOI = llvm::make_unique<FunctionOutliningInfo>(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; - Function *DuplicateFunction = CloneFunction(F, VMap); - BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]); - BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]); - DenseSet<BasicBlock *> NewEntries; + ClonedFunc = CloneFunction(F, VMap); + + ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]); + ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]); for (BasicBlock *BB : OI->Entries) { - NewEntries.insert(cast<BasicBlock>(VMap[BB])); + ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB])); + } + for (BasicBlock *E : OI->ReturnBlockPreds) { + BasicBlock *NewE = cast<BasicBlock>(VMap[E]); + ClonedOI->ReturnBlockPreds.push_back(NewE); } - // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. - F->replaceAllUsesWith(DuplicateFunction); + F->replaceAllUsesWith(ClonedFunc); +} + +void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { auto getFirstPHI = [](BasicBlock *BB) { BasicBlock::iterator I = BB->begin(); @@ -692,14 +675,19 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { } return FirstPhi; }; + // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. - BasicBlock *PreReturn = NewReturnBlock; + BasicBlock *PreReturn = ClonedOI->ReturnBlock; // only split block when necessary: PHINode *FirstPhi = getFirstPHI(PreReturn); - unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size(); + unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size(); + + if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1) + return; + auto IsTrivialPhi = [](PHINode *PN) -> Value * { Value *CommonValue = PN->getIncomingValue(0); if (all_of(PN->incoming_values(), @@ -708,143 +696,185 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { return nullptr; }; - if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { - - NewReturnBlock = NewReturnBlock->splitBasicBlock( - NewReturnBlock->getFirstNonPHI()->getIterator()); - BasicBlock::iterator I = PreReturn->begin(); - Instruction *Ins = &NewReturnBlock->front(); - SmallVector<Instruction *, 4> DeadPhis; - while (I != PreReturn->end()) { - PHINode *OldPhi = dyn_cast<PHINode>(I); - if (!OldPhi) - break; - - PHINode *RetPhi = - PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); - OldPhi->replaceAllUsesWith(RetPhi); - Ins = NewReturnBlock->getFirstNonPHI(); + ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock( + ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator()); + BasicBlock::iterator I = PreReturn->begin(); + Instruction *Ins = &ClonedOI->ReturnBlock->front(); + SmallVector<Instruction *, 4> DeadPhis; + while (I != PreReturn->end()) { + PHINode *OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) + break; - RetPhi->addIncoming(&*I, PreReturn); - for (BasicBlock *E : OI->ReturnBlockPreds) { - BasicBlock *NewE = cast<BasicBlock>(VMap[E]); - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); - OldPhi->removeIncomingValue(NewE); - } + PHINode *RetPhi = + PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); + OldPhi->replaceAllUsesWith(RetPhi); + Ins = ClonedOI->ReturnBlock->getFirstNonPHI(); - // After incoming values splitting, the old phi may become trivial. - // Keeping the trivial phi can introduce definition inside the outline - // region which is live-out, causing necessary overhead (load, store - // arg passing etc). - if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { - OldPhi->replaceAllUsesWith(OldPhiVal); - DeadPhis.push_back(OldPhi); - } - - ++I; + RetPhi->addIncoming(&*I, PreReturn); + for (BasicBlock *E : ClonedOI->ReturnBlockPreds) { + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E); + OldPhi->removeIncomingValue(E); } + // After incoming values splitting, the old phi may become trivial. + // Keeping the trivial phi can introduce definition inside the outline + // region which is live-out, causing necessary overhead (load, store + // arg passing etc). + if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { + OldPhi->replaceAllUsesWith(OldPhiVal); + DeadPhis.push_back(OldPhi); + } + ++I; + } for (auto *DP : DeadPhis) DP->eraseFromParent(); - for (auto E : OI->ReturnBlockPreds) { - BasicBlock *NewE = cast<BasicBlock>(VMap[E]); - NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + for (auto E : ClonedOI->ReturnBlockPreds) { + E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); } - } +} +Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { // Returns true if the block is to be partial inlined into the caller // (i.e. not to be extracted to the out of line function) - auto ToBeInlined = [&](BasicBlock *BB) { - return BB == NewReturnBlock || NewEntries.count(BB); + auto ToBeInlined = [&, this](BasicBlock *BB) { + return BB == ClonedOI->ReturnBlock || + (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) != + ClonedOI->Entries.end()); }; + // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; - ToExtract.push_back(NewNonReturnBlock); - for (BasicBlock &BB : *DuplicateFunction) - if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock) + ToExtract.push_back(ClonedOI->NonReturnBlock); + OutlinedRegionCost += + PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock); + for (BasicBlock &BB : *ClonedFunc) + if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) { ToExtract.push_back(&BB); + // FIXME: the code extractor may hoist/sink more code + // into the outlined function which may make the outlining + // overhead (the difference of the outlined function cost + // and OutliningRegionCost) look larger. + OutlinedRegionCost += computeBBInlineCost(&BB); + } // The CodeExtractor needs a dominator tree. DominatorTree DT; - DT.recalculate(*DuplicateFunction); + DT.recalculate(*ClonedFunc); // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. LoopInfo LI(DT); - BranchProbabilityInfo BPI(*DuplicateFunction, LI); - BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI); + BranchProbabilityInfo BPI(*ClonedFunc, LI); + ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); // Extract the body of the if. - Function *OutlinedFunction = - CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI) - .extractCodeRegion(); + OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, + ClonedFuncBFI.get(), &BPI) + .extractCodeRegion(); + + if (OutlinedFunc) { + OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) + .getInstruction() + ->getParent(); + assert(OutliningCallBB->getParent() == ClonedFunc); + } - bool AnyInline = - tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI); + return OutlinedFunc; +} +PartialInlinerImpl::FunctionCloner::~FunctionCloner() { // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. - DuplicateFunction->replaceAllUsesWith(F); - DuplicateFunction->eraseFromParent(); + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + if (!IsFunctionInlined) { + // Remove the function that is speculatively created if there is no + // reference. + if (OutlinedFunc) + OutlinedFunc->eraseFromParent(); + } +} + +Function *PartialInlinerImpl::unswitchFunction(Function *F) { + + if (F->hasAddressTaken()) + return nullptr; + + // Let inliner handle it + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return nullptr; + + if (F->hasFnAttribute(Attribute::NoInline)) + return nullptr; + + if (PSI->isFunctionEntryCold(F)) + return nullptr; + + if (F->user_begin() == F->user_end()) + return nullptr; + + std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F); + + if (!OI) + return nullptr; + + FunctionCloner Cloner(F, OI.get()); + Cloner.NormalizeReturnBlock(); + Function *OutlinedFunction = Cloner.doFunctionOutlining(); + + bool AnyInline = tryPartialInline(Cloner); if (AnyInline) return OutlinedFunction; - // Remove the function that is speculatively created: - if (OutlinedFunction) - OutlinedFunction->eraseFromParent(); - return nullptr; } -bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, - Function *F, - FunctionOutliningInfo *OI, - Function *OutlinedFunction, - BlockFrequencyInfo *CalleeBFI) { - if (OutlinedFunction == nullptr) - return false; - +bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { int NonWeightedRcost; int SizeCost; - int OutlinedRegionSizeCost; - auto OutliningCallBB = - getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent(); + if (Cloner.OutlinedFunc == nullptr) + return false; + + std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner); - std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) = - computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB); + auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); + auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; // The call sequence to the outlined function is larger than the original // outlined region size, it does not increase the chances of inlining - // 'F' with outlining (The inliner usies the size increase to model the - // the cost of inlining a callee). - if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) { - OptimizationRemarkEmitter ORE(F); + // the function with outlining (The inliner usies the size increase to + // model the cost of inlining a callee). + if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { + OptimizationRemarkEmitter ORE(Cloner.OrigFunc); DebugLoc DLoc; BasicBlock *Block; - std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction); + std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", DLoc, Block) - << ore::NV("Function", F) + << ore::NV("Function", Cloner.OrigFunc) << " not partially inlined into callers (Original Size = " - << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost) + << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost) << ", Size of call sequence to outlined function = " << ore::NV("NewSize", SizeCost) << ")"); return false; } - assert(F->user_begin() == F->user_end() && + assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() && "F's users should all be replaced!"); - std::vector<User *> Users(DuplicateFunction->user_begin(), - DuplicateFunction->user_end()); + + std::vector<User *> Users(Cloner.ClonedFunc->user_begin(), + Cloner.ClonedFunc->user_end()); DenseMap<User *, uint64_t> CallSiteToProfCountMap; - if (F->getEntryCount()) - computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap); + if (Cloner.OrigFunc->getEntryCount()) + computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); - auto CalleeEntryCount = F->getEntryCount(); + auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount(); uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); + bool AnyInline = false; for (User *User : Users) { CallSite CS = getCallSite(User); @@ -854,13 +884,12 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, OptimizationRemarkEmitter ORE(CS.getCaller()); - if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB, - NonWeightedRcost, ORE)) + if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) continue; ORE.emit( OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()) - << ore::NV("Callee", F) << " partially inlined into " + << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " << ore::NV("Caller", CS.getCaller())); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); @@ -878,8 +907,11 @@ bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction, NumPartialInlined++; } - if (AnyInline && CalleeEntryCount) - F->setEntryCount(CalleeEntryCountV); + if (AnyInline) { + Cloner.IsFunctionInlined = true; + if (CalleeEntryCount) + Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); + } return AnyInline; } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 16fba32e9805..4bc64ab698ff 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -141,6 +141,10 @@ static cl::opt<int> PreInlineThreshold( cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); +static cl::opt<bool> EnableEarlyCSEMemSSA( + "enable-earlycse-memssa", cl::init(false), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = off)")); + static cl::opt<bool> EnableGVNHoist( "enable-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass (default = off)")); @@ -308,7 +312,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies if (EnableGVNHoist) MPM.add(createGVNHoistPass()); if (EnableGVNSink) { diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index a7bcc7cc5532..802f470ffe1f 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -32,7 +32,8 @@ namespace { // Promote each local-linkage entity defined by ExportM and used by ImportM by // changing visibility and appending the given ModuleId. -void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { +void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, + SetVector<GlobalValue *> &PromoteExtra) { DenseMap<const Comdat *, Comdat *> RenamedComdats; for (auto &ExportGV : ExportM.global_values()) { if (!ExportGV.hasLocalLinkage()) @@ -40,7 +41,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { auto Name = ExportGV.getName(); GlobalValue *ImportGV = ImportM.getNamedValue(Name); - if (!ImportGV || ImportGV->use_empty()) + if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV)) continue; std::string NewName = (Name + ModuleId).str(); @@ -53,8 +54,10 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { ExportGV.setLinkage(GlobalValue::ExternalLinkage); ExportGV.setVisibility(GlobalValue::HiddenVisibility); - ImportGV->setName(NewName); - ImportGV->setVisibility(GlobalValue::HiddenVisibility); + if (ImportGV) { + ImportGV->setName(NewName); + ImportGV->setVisibility(GlobalValue::HiddenVisibility); + } } if (!RenamedComdats.empty()) @@ -296,6 +299,11 @@ void splitAndWriteThinLTOBitcode( F.setComdat(nullptr); } + SetVector<GlobalValue *> CfiFunctions; + for (auto &F : M) + if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F)) + CfiFunctions.insert(&F); + // Remove all globals with type metadata, globals with comdats that live in // MergedM, and aliases pointing to such globals from the thin LTO module. filterModule(&M, [&](const GlobalValue *GV) { @@ -308,11 +316,39 @@ void splitAndWriteThinLTOBitcode( return true; }); - promoteInternals(*MergedM, M, ModuleId); - promoteInternals(M, *MergedM, ModuleId); + promoteInternals(*MergedM, M, ModuleId, CfiFunctions); + promoteInternals(M, *MergedM, ModuleId, CfiFunctions); + + SmallVector<MDNode *, 8> CfiFunctionMDs; + for (auto V : CfiFunctions) { + Function &F = *cast<Function>(V); + SmallVector<MDNode *, 2> Types; + F.getMetadata(LLVMContext::MD_type, Types); + + auto &Ctx = MergedM->getContext(); + SmallVector<Metadata *, 4> Elts; + Elts.push_back(MDString::get(Ctx, F.getName())); + CfiFunctionLinkage Linkage; + if (!F.isDeclarationForLinker()) + Linkage = CFL_Definition; + else if (F.isWeakForLinker()) + Linkage = CFL_WeakDeclaration; + else + Linkage = CFL_Declaration; + Elts.push_back(ConstantAsMetadata::get( + llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage))); + for (auto Type : Types) + Elts.push_back(Type); + CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts)); + } - simplifyExternals(*MergedM); + if(!CfiFunctionMDs.empty()) { + NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions"); + for (auto MD : CfiFunctionMDs) + NMD->addOperand(MD); + } + simplifyExternals(*MergedM); // FIXME: Try to re-use BSI and PFI from the original module here. ProfileSummaryInfo PSI(M); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4fe3225a2172..a881bda5ba98 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -763,8 +763,54 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, return nullptr; } +// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) +// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) +Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, + bool JoinedByAnd, + Instruction &CxtI) { + ICmpInst::Predicate Pred = LHS->getPredicate(); + if (Pred != RHS->getPredicate()) + return nullptr; + if (JoinedByAnd && Pred != ICmpInst::ICMP_NE) + return nullptr; + if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ) + return nullptr; + + // TODO support vector splats + ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1)); + ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1)); + if (!LHSC || !RHSC || !LHSC->isZero() || !RHSC->isZero()) + return nullptr; + + Value *A, *B, *C, *D; + if (match(LHS->getOperand(0), m_And(m_Value(A), m_Value(B))) && + match(RHS->getOperand(0), m_And(m_Value(C), m_Value(D)))) { + if (A == D || B == D) + std::swap(C, D); + if (B == C) + std::swap(A, B); + + if (A == C && + isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) && + isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) { + Value *Mask = Builder->CreateOr(B, D); + Value *Masked = Builder->CreateAnd(A, Mask); + auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; + return Builder->CreateICmp(NewPred, Masked, Mask); + } + } + + return nullptr; +} + /// Fold (icmp)&(icmp) if possible. -Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, + Instruction &CxtI) { + // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) + // if K1 and K2 are a one-bit mask. + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI)) + return V; + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) @@ -1127,8 +1173,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src); ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src); if (ICmp0 && ICmp1) { - Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1) - : foldOrOfICmps(ICmp0, ICmp1, &I); + Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1, I) + : foldOrOfICmps(ICmp0, ICmp1, I); if (Res) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; @@ -1426,7 +1472,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ICmpInst *LHS = dyn_cast<ICmpInst>(Op0); ICmpInst *RHS = dyn_cast<ICmpInst>(Op1); if (LHS && RHS) - if (Value *Res = foldAndOfICmps(LHS, RHS)) + if (Value *Res = foldAndOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -1434,18 +1480,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = foldAndOfICmps(LHS, Cmp)) + if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = foldAndOfICmps(LHS, Cmp)) + if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); } if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = foldAndOfICmps(Cmp, RHS)) + if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = foldAndOfICmps(Cmp, RHS)) + if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); } } @@ -1591,41 +1637,16 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D, /// Fold (icmp)|(icmp) if possible. Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, - Instruction *CxtI) { - ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - + Instruction &CxtI) { // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // if K1 and K2 are a one-bit mask. - ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1)); - ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1)); + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI)) + return V; - if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero() && - RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) { - - BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0)); - BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0)); - if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() && - LAnd->getOpcode() == Instruction::And && - RAnd->getOpcode() == Instruction::And) { - - Value *Mask = nullptr; - Value *Masked = nullptr; - if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, CxtI) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, CxtI)) { - Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); - Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); - } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, CxtI) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, CxtI)) { - Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); - Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); - } + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - if (Masked) - return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask); - } - } + ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1)); + ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1)); // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) @@ -2117,12 +2138,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C + // FIXME: The two hasOneUse calls here are the same call, maybe we were + // supposed to check Op1->operand(0)? if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse()) return BinaryOperator::CreateOr(Op0, C); // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C + // FIXME: The two hasOneUse calls here are the same call, maybe we were + // supposed to check Op0->operand(0)? if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse()) @@ -2194,7 +2219,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ICmpInst *LHS = dyn_cast<ICmpInst>(Op0); ICmpInst *RHS = dyn_cast<ICmpInst>(Op1); if (LHS && RHS) - if (Value *Res = foldOrOfICmps(LHS, RHS, &I)) + if (Value *Res = foldOrOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -2202,18 +2227,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = foldOrOfICmps(LHS, Cmp, &I)) + if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = foldOrOfICmps(LHS, Cmp, &I)) + if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); } if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) - if (Value *Res = foldOrOfICmps(Cmp, RHS, &I)) + if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) - if (Value *Res = foldOrOfICmps(Cmp, RHS, &I)) + if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d29ed49eca0b..c0830a5d2112 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -94,75 +94,80 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) { return ConstantVector::get(BoolVec); } -Instruction * -InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) { +Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( + ElementUnorderedAtomicMemCpyInst *AMI) { // Try to unfold this intrinsic into sequence of explicit atomic loads and // stores. // First check that number of elements is compile time constant. - auto *NumElementsCI = dyn_cast<ConstantInt>(AMI->getNumElements()); - if (!NumElementsCI) + auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength()); + if (!LengthCI) return nullptr; // Check that there are not too many elements. - uint64_t NumElements = NumElementsCI->getZExtValue(); + uint64_t LengthInBytes = LengthCI->getZExtValue(); + uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes(); + uint64_t NumElements = LengthInBytes / ElementSizeInBytes; if (NumElements >= UnfoldElementAtomicMemcpyMaxElements) return nullptr; - // Don't unfold into illegal integers - uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8; - if (!getDataLayout().isLegalInteger(ElementSizeInBytes)) - return nullptr; + // Only expand if there are elements to copy. + if (NumElements > 0) { + // Don't unfold into illegal integers + uint64_t ElementSizeInBits = ElementSizeInBytes * 8; + if (!getDataLayout().isLegalInteger(ElementSizeInBits)) + return nullptr; - // Cast source and destination to the correct type. Intrinsic input arguments - // are usually represented as i8*. - // Often operands will be explicitly casted to i8* and we can just strip - // those casts instead of inserting new ones. However it's easier to rely on - // other InstCombine rules which will cover trivial cases anyway. - Value *Src = AMI->getRawSource(); - Value *Dst = AMI->getRawDest(); - Type *ElementPointerType = Type::getIntNPtrTy( - AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace()); - - Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, - "memcpy_unfold.src_casted"); - Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, - "memcpy_unfold.dst_casted"); - - for (uint64_t i = 0; i < NumElements; ++i) { - // Get current element addresses - ConstantInt *ElementIdxCI = - ConstantInt::get(AMI->getContext(), APInt(64, i)); - Value *SrcElementAddr = - Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); - Value *DstElementAddr = - Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); - - // Load from the source. Transfer alignment information and mark load as - // unordered atomic. - LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); - Load->setOrdering(AtomicOrdering::Unordered); - // We know alignment of the first element. It is also guaranteed by the - // verifier that element size is less or equal than first element alignment - // and both of this values are powers of two. - // This means that all subsequent accesses are at least element size - // aligned. - // TODO: We can infer better alignment but there is no evidence that this - // will matter. - Load->setAlignment(i == 0 ? AMI->getSrcAlignment() - : AMI->getElementSizeInBytes()); - Load->setDebugLoc(AMI->getDebugLoc()); - - // Store loaded value via unordered atomic store. - StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); - Store->setOrdering(AtomicOrdering::Unordered); - Store->setAlignment(i == 0 ? AMI->getDstAlignment() - : AMI->getElementSizeInBytes()); - Store->setDebugLoc(AMI->getDebugLoc()); + // Cast source and destination to the correct type. Intrinsic input + // arguments are usually represented as i8*. Often operands will be + // explicitly casted to i8* and we can just strip those casts instead of + // inserting new ones. However it's easier to rely on other InstCombine + // rules which will cover trivial cases anyway. + Value *Src = AMI->getRawSource(); + Value *Dst = AMI->getRawDest(); + Type *ElementPointerType = + Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits, + Src->getType()->getPointerAddressSpace()); + + Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, + "memcpy_unfold.src_casted"); + Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, + "memcpy_unfold.dst_casted"); + + for (uint64_t i = 0; i < NumElements; ++i) { + // Get current element addresses + ConstantInt *ElementIdxCI = + ConstantInt::get(AMI->getContext(), APInt(64, i)); + Value *SrcElementAddr = + Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); + Value *DstElementAddr = + Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); + + // Load from the source. Transfer alignment information and mark load as + // unordered atomic. + LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); + Load->setOrdering(AtomicOrdering::Unordered); + // We know alignment of the first element. It is also guaranteed by the + // verifier that element size is less or equal than first element + // alignment and both of this values are powers of two. This means that + // all subsequent accesses are at least element size aligned. + // TODO: We can infer better alignment but there is no evidence that this + // will matter. + Load->setAlignment(i == 0 ? AMI->getParamAlignment(1) + : ElementSizeInBytes); + Load->setDebugLoc(AMI->getDebugLoc()); + + // Store loaded value via unordered atomic store. + StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); + Store->setOrdering(AtomicOrdering::Unordered); + Store->setAlignment(i == 0 ? AMI->getParamAlignment(0) + : ElementSizeInBytes); + Store->setDebugLoc(AMI->getDebugLoc()); + } } // Set the number of elements of the copy to 0, it will be deleted on the // next iteration. - AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType())); + AMI->setLength(Constant::getNullValue(LengthCI->getType())); return AMI; } @@ -1888,12 +1893,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } - if (auto *AMI = dyn_cast<ElementAtomicMemCpyInst>(II)) { - if (Constant *C = dyn_cast<Constant>(AMI->getNumElements())) + if (auto *AMI = dyn_cast<ElementUnorderedAtomicMemCpyInst>(II)) { + if (Constant *C = dyn_cast<Constant>(AMI->getLength())) if (C->isNullValue()) return eraseInstFromFunction(*AMI); - if (Instruction *I = SimplifyElementAtomicMemCpy(AMI)) + if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI)) return I; } diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index fd0a64a5bbb5..1a7db146df42 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -447,12 +447,14 @@ private: Instruction::CastOps isEliminableCastPair(const CastInst *CI1, const CastInst *CI2); - Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); Value *foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); - Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); + Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); Value *foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, + bool JoinedByAnd, Instruction &CxtI); public: /// \brief Inserts an instruction \p New before instruction \p Old /// @@ -724,7 +726,8 @@ private: Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); - Instruction *SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI); + Instruction * + SimplifyElementUnorderedAtomicMemCpy(ElementUnorderedAtomicMemCpyInst *AMI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 3f2ddcacce2b..8cec865c6422 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -682,11 +682,11 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } - if (match(Op0, m_SExt(m_Value(X)))) { + if (match(Op0, m_SExt(m_Value(X))) && + (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { // Are we moving the sign bit to the low bit and widening with high zeros? unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits(); - if (ShAmt == BitWidth - 1 && - (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { + if (ShAmt == BitWidth - 1) { // lshr (sext i1 X to iN), N-1 --> zext X to iN if (SrcTyBitWidth == 1) return new ZExtInst(X, Ty); @@ -698,7 +698,13 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { } } - // TODO: Convert to ashr+zext if the shift equals the extension amount. + // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN + if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) { + // The new shift amount can't be more than the narrow source type. + unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1); + Value *AShr = Builder->CreateAShr(X, NewShAmt); + return new ZExtInst(AShr, Ty); + } } if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) { diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 7ff69b9eb7f4..f2806e278e6e 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMInstrumentation Instrumentation.cpp InstrProfiling.cpp PGOInstrumentation.cpp + PGOMemOPSizeOpt.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp EfficiencySanitizer.cpp diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 96027bc3d0a9..0d308810009d 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -56,8 +56,6 @@ using namespace llvm; STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); -STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); -STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. @@ -111,44 +109,6 @@ static cl::opt<bool> ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); -// The minimum call count to optimize memory intrinsic calls. -static cl::opt<unsigned> - MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, - cl::init(1000), - cl::desc("The minimum count to optimize memory " - "intrinsic calls")); - -// Command line option to disable memory intrinsic optimization. The default is -// false. This is for debug purpose. -static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false), - cl::Hidden, cl::desc("Disable optimize")); - -// The percent threshold to optimize memory intrinsic calls. -static cl::opt<unsigned> - MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), - cl::Hidden, cl::ZeroOrMore, - cl::desc("The percentage threshold for the " - "memory intrinsic calls optimization")); - -// Maximum number of versions for optimizing memory intrinsic call. -static cl::opt<unsigned> - MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, - cl::ZeroOrMore, - cl::desc("The max version for the optimized memory " - " intrinsic calls")); - -// Scale the counts from the annotation using the BB count value. -static cl::opt<bool> - MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, - cl::desc("Scale the memop size counts using the basic " - " block count value")); - -// This option sets the rangge of precise profile memop sizes. -extern cl::opt<std::string> MemOPSizeRange; - -// This option sets the value that groups large memop sizes -extern cl::opt<unsigned> MemOPSizeLarge; - namespace { class PGOIndirectCallPromotionLegacyPass : public ModulePass { public: @@ -173,24 +133,6 @@ private: // the promoted direct call. bool SamplePGO; }; - -class PGOMemOPSizeOptLegacyPass : public FunctionPass { -public: - static char ID; - - PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { - initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "PGOMemOPSize"; } - -private: - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<BlockFrequencyInfoWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } -}; } // end anonymous namespace char PGOIndirectCallPromotionLegacyPass::ID = 0; @@ -204,19 +146,6 @@ ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); } -char PGOMemOPSizeOptLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) - -FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { - return new PGOMemOPSizeOptLegacyPass(); -} - namespace { // The class for main data structure to promote indirect calls to conditional // direct calls. @@ -749,285 +678,3 @@ PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, return PreservedAnalyses::none(); } - -namespace { -class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { -public: - MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) - : Func(Func), BFI(BFI), Changed(false) { - ValueDataArray = - llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); - // Get the MemOPSize range information from option MemOPSizeRange, - getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, - PreciseRangeLast); - } - bool isChanged() const { return Changed; } - void perform() { - WorkList.clear(); - visit(Func); - - for (auto &MI : WorkList) { - ++NumOfPGOMemOPAnnotate; - if (perform(MI)) { - Changed = true; - ++NumOfPGOMemOPOpt; - DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() - << "is Transformed.\n"); - } - } - } - - void visitMemIntrinsic(MemIntrinsic &MI) { - Value *Length = MI.getLength(); - // Not perform on constant length calls. - if (dyn_cast<ConstantInt>(Length)) - return; - WorkList.push_back(&MI); - } - -private: - Function &Func; - BlockFrequencyInfo &BFI; - bool Changed; - std::vector<MemIntrinsic *> WorkList; - // Start of the previse range. - int64_t PreciseRangeStart; - // Last value of the previse range. - int64_t PreciseRangeLast; - // The space to read the profile annotation. - std::unique_ptr<InstrProfValueData[]> ValueDataArray; - bool perform(MemIntrinsic *MI); - - // This kind shows which group the value falls in. For PreciseValue, we have - // the profile count for that value. LargeGroup groups the values that are in - // range [LargeValue, +inf). NonLargeGroup groups the rest of values. - enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; - - MemOPSizeKind getMemOPSizeKind(int64_t Value) const { - if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) - return LargeGroup; - if (Value == PreciseRangeLast + 1) - return NonLargeGroup; - return PreciseValue; - } -}; - -static const char *getMIName(const MemIntrinsic *MI) { - switch (MI->getIntrinsicID()) { - case Intrinsic::memcpy: - return "memcpy"; - case Intrinsic::memmove: - return "memmove"; - case Intrinsic::memset: - return "memset"; - default: - return "unknown"; - } -} - -static bool isProfitable(uint64_t Count, uint64_t TotalCount) { - assert(Count <= TotalCount); - if (Count < MemOPCountThreshold) - return false; - if (Count < TotalCount * MemOPPercentThreshold / 100) - return false; - return true; -} - -static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, - uint64_t Denom) { - if (!MemOPScaleCount) - return Count; - bool Overflowed; - uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); - return ScaleCount / Denom; -} - -bool MemOPSizeOpt::perform(MemIntrinsic *MI) { - assert(MI); - if (MI->getIntrinsicID() == Intrinsic::memmove) - return false; - - uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; - uint64_t TotalCount; - if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, - ValueDataArray.get(), NumVals, TotalCount)) - return false; - - uint64_t ActualCount = TotalCount; - uint64_t SavedTotalCount = TotalCount; - if (MemOPScaleCount) { - auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); - if (!BBEdgeCount) - return false; - ActualCount = *BBEdgeCount; - } - - ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); - DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount - << "\n"); - DEBUG( - for (auto &VD - : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); - - if (ActualCount < MemOPCountThreshold) - return false; - // Skip if the total value profiled count is 0, in which case we can't - // scale up the counts properly (and there is no profitable transformation). - if (TotalCount == 0) - return false; - - TotalCount = ActualCount; - if (MemOPScaleCount) - DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount - << " denominator = " << SavedTotalCount << "\n"); - - // Keeping track of the count of the default case: - uint64_t RemainCount = TotalCount; - SmallVector<uint64_t, 16> SizeIds; - SmallVector<uint64_t, 16> CaseCounts; - uint64_t MaxCount = 0; - unsigned Version = 0; - // Default case is in the front -- save the slot here. - CaseCounts.push_back(0); - for (auto &VD : VDs) { - int64_t V = VD.Value; - uint64_t C = VD.Count; - if (MemOPScaleCount) - C = getScaledCount(C, ActualCount, SavedTotalCount); - - // Only care precise value here. - if (getMemOPSizeKind(V) != PreciseValue) - continue; - - // ValueCounts are sorted on the count. Break at the first un-profitable - // value. - if (!isProfitable(C, RemainCount)) - break; - - SizeIds.push_back(V); - CaseCounts.push_back(C); - if (C > MaxCount) - MaxCount = C; - - assert(RemainCount >= C); - RemainCount -= C; - - if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) - break; - } - - if (Version == 0) - return false; - - CaseCounts[0] = RemainCount; - if (RemainCount > MaxCount) - MaxCount = RemainCount; - - uint64_t SumForOpt = TotalCount - RemainCount; - - DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version - << " Versions (covering " << SumForOpt << " out of " - << TotalCount << ")\n"); - - // mem_op(..., size) - // ==> - // switch (size) { - // case s1: - // mem_op(..., s1); - // goto merge_bb; - // case s2: - // mem_op(..., s2); - // goto merge_bb; - // ... - // default: - // mem_op(..., size); - // goto merge_bb; - // } - // merge_bb: - - BasicBlock *BB = MI->getParent(); - DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); - DEBUG(dbgs() << *BB << "\n"); - auto OrigBBFreq = BFI.getBlockFreq(BB); - - BasicBlock *DefaultBB = SplitBlock(BB, MI); - BasicBlock::iterator It(*MI); - ++It; - assert(It != DefaultBB->end()); - BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); - MergeBB->setName("MemOP.Merge"); - BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); - DefaultBB->setName("MemOP.Default"); - - auto &Ctx = Func.getContext(); - IRBuilder<> IRB(BB); - BB->getTerminator()->eraseFromParent(); - Value *SizeVar = MI->getLength(); - SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); - - // Clear the value profile data. - MI->setMetadata(LLVMContext::MD_prof, nullptr); - - DEBUG(dbgs() << "\n\n== Basic Block After==\n"); - - for (uint64_t SizeId : SizeIds) { - ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); - BasicBlock *CaseBB = BasicBlock::Create( - Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); - Instruction *NewInst = MI->clone(); - // Fix the argument. - dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId); - CaseBB->getInstList().push_back(NewInst); - IRBuilder<> IRBCase(CaseBB); - IRBCase.CreateBr(MergeBB); - SI->addCase(CaseSizeId, CaseBB); - DEBUG(dbgs() << *CaseBB << "\n"); - } - setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); - - DEBUG(dbgs() << *BB << "\n"); - DEBUG(dbgs() << *DefaultBB << "\n"); - DEBUG(dbgs() << *MergeBB << "\n"); - - emitOptimizationRemark(Func.getContext(), "memop-opt", Func, - MI->getDebugLoc(), - Twine("optimize ") + getMIName(MI) + " with count " + - Twine(SumForOpt) + " out of " + Twine(TotalCount) + - " for " + Twine(Version) + " versions"); - - return true; -} -} // namespace - -static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { - if (DisableMemOPOPT) - return false; - - if (F.hasFnAttribute(Attribute::OptimizeForSize)) - return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI); - MemOPSizeOpt.perform(); - return MemOPSizeOpt.isChanged(); -} - -bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); - return PGOMemOPSizeOptImpl(F, BFI); -} - -namespace llvm { -char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; - -PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, - FunctionAnalysisManager &FAM) { - auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI); - if (!Changed) - return PreservedAnalyses::all(); - auto PA = PreservedAnalyses(); - PA.preserve<GlobalsAA>(); - return PA; -} -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index f83c930ca61b..37f88d5f95f1 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -343,14 +343,24 @@ static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { static inline bool shouldRecordFunctionAddr(Function *F) { // Check the linkage + bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage(); if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && - !F->hasAvailableExternallyLinkage()) + !HasAvailableExternallyLinkage) return true; + + // A function marked 'alwaysinline' with available_externally linkage can't + // have its address taken. Doing so would create an undefined external ref to + // the function, which would fail to link. + if (HasAvailableExternallyLinkage && + F->hasFnAttribute(Attribute::AlwaysInline)) + return false; + // Prohibit function address recording if the function is both internal and // COMDAT. This avoids the profile data variable referencing internal symbols // in COMDAT. if (F->hasLocalLinkage() && F->hasComdat()) return false; + // Check uses of this function for other than direct calls or invokes to it. // Inline virtual functions have linkeOnceODR linkage. When a key method // exists, the vtable will only be emitted in the TU where the key method diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp new file mode 100644 index 000000000000..0bc9ddfbe4d3 --- /dev/null +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -0,0 +1,419 @@ +//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the transformation that optimizes memory intrinsics +// such as memcpy using the size value profile. When memory intrinsic size +// value profile metadata is available, a single memory intrinsic is expanded +// to a sequence of guarded specialized versions that are called with the +// hottest size(s), for later expansion into more optimal inline sequences. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/PGOInstrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <cassert> +#include <cstdint> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "pgo-memop-opt" + +STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); +STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); + +// The minimum call count to optimize memory intrinsic calls. +static cl::opt<unsigned> + MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to optimize memory " + "intrinsic calls")); + +// Command line option to disable memory intrinsic optimization. The default is +// false. This is for debug purpose. +static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false), + cl::Hidden, cl::desc("Disable optimize")); + +// The percent threshold to optimize memory intrinsic calls. +static cl::opt<unsigned> + MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), + cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold for the " + "memory intrinsic calls optimization")); + +// Maximum number of versions for optimizing memory intrinsic call. +static cl::opt<unsigned> + MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The max version for the optimized memory " + " intrinsic calls")); + +// Scale the counts from the annotation using the BB count value. +static cl::opt<bool> + MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, + cl::desc("Scale the memop size counts using the basic " + " block count value")); + +// This option sets the rangge of precise profile memop sizes. +extern cl::opt<std::string> MemOPSizeRange; + +// This option sets the value that groups large memop sizes +extern cl::opt<unsigned> MemOPSizeLarge; + +namespace { +class PGOMemOPSizeOptLegacyPass : public FunctionPass { +public: + static char ID; + + PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { + initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "PGOMemOPSize"; } + +private: + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<BlockFrequencyInfoWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; +} // end anonymous namespace + +char PGOMemOPSizeOptLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) + +FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { + return new PGOMemOPSizeOptLegacyPass(); +} + +namespace { +class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { +public: + MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) + : Func(Func), BFI(BFI), Changed(false) { + ValueDataArray = + llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); + // Get the MemOPSize range information from option MemOPSizeRange, + getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, + PreciseRangeLast); + } + bool isChanged() const { return Changed; } + void perform() { + WorkList.clear(); + visit(Func); + + for (auto &MI : WorkList) { + ++NumOfPGOMemOPAnnotate; + if (perform(MI)) { + Changed = true; + ++NumOfPGOMemOPOpt; + DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() + << "is Transformed.\n"); + } + } + } + + void visitMemIntrinsic(MemIntrinsic &MI) { + Value *Length = MI.getLength(); + // Not perform on constant length calls. + if (dyn_cast<ConstantInt>(Length)) + return; + WorkList.push_back(&MI); + } + +private: + Function &Func; + BlockFrequencyInfo &BFI; + bool Changed; + std::vector<MemIntrinsic *> WorkList; + // Start of the previse range. + int64_t PreciseRangeStart; + // Last value of the previse range. + int64_t PreciseRangeLast; + // The space to read the profile annotation. + std::unique_ptr<InstrProfValueData[]> ValueDataArray; + bool perform(MemIntrinsic *MI); + + // This kind shows which group the value falls in. For PreciseValue, we have + // the profile count for that value. LargeGroup groups the values that are in + // range [LargeValue, +inf). NonLargeGroup groups the rest of values. + enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; + + MemOPSizeKind getMemOPSizeKind(int64_t Value) const { + if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) + return LargeGroup; + if (Value == PreciseRangeLast + 1) + return NonLargeGroup; + return PreciseValue; + } +}; + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +static bool isProfitable(uint64_t Count, uint64_t TotalCount) { + assert(Count <= TotalCount); + if (Count < MemOPCountThreshold) + return false; + if (Count < TotalCount * MemOPPercentThreshold / 100) + return false; + return true; +} + +static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, + uint64_t Denom) { + if (!MemOPScaleCount) + return Count; + bool Overflowed; + uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); + return ScaleCount / Denom; +} + +bool MemOPSizeOpt::perform(MemIntrinsic *MI) { + assert(MI); + if (MI->getIntrinsicID() == Intrinsic::memmove) + return false; + + uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; + uint64_t TotalCount; + if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount)) + return false; + + uint64_t ActualCount = TotalCount; + uint64_t SavedTotalCount = TotalCount; + if (MemOPScaleCount) { + auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + if (!BBEdgeCount) + return false; + ActualCount = *BBEdgeCount; + } + + ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); + DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount + << "\n"); + DEBUG( + for (auto &VD + : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); + + if (ActualCount < MemOPCountThreshold) + return false; + // Skip if the total value profiled count is 0, in which case we can't + // scale up the counts properly (and there is no profitable transformation). + if (TotalCount == 0) + return false; + + TotalCount = ActualCount; + if (MemOPScaleCount) + DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount + << " denominator = " << SavedTotalCount << "\n"); + + // Keeping track of the count of the default case: + uint64_t RemainCount = TotalCount; + uint64_t SavedRemainCount = SavedTotalCount; + SmallVector<uint64_t, 16> SizeIds; + SmallVector<uint64_t, 16> CaseCounts; + uint64_t MaxCount = 0; + unsigned Version = 0; + // Default case is in the front -- save the slot here. + CaseCounts.push_back(0); + for (auto &VD : VDs) { + int64_t V = VD.Value; + uint64_t C = VD.Count; + if (MemOPScaleCount) + C = getScaledCount(C, ActualCount, SavedTotalCount); + + // Only care precise value here. + if (getMemOPSizeKind(V) != PreciseValue) + continue; + + // ValueCounts are sorted on the count. Break at the first un-profitable + // value. + if (!isProfitable(C, RemainCount)) + break; + + SizeIds.push_back(V); + CaseCounts.push_back(C); + if (C > MaxCount) + MaxCount = C; + + assert(RemainCount >= C); + RemainCount -= C; + assert(SavedRemainCount >= VD.Count); + SavedRemainCount -= VD.Count; + + if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) + break; + } + + if (Version == 0) + return false; + + CaseCounts[0] = RemainCount; + if (RemainCount > MaxCount) + MaxCount = RemainCount; + + uint64_t SumForOpt = TotalCount - RemainCount; + + DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version + << " Versions (covering " << SumForOpt << " out of " + << TotalCount << ")\n"); + + // mem_op(..., size) + // ==> + // switch (size) { + // case s1: + // mem_op(..., s1); + // goto merge_bb; + // case s2: + // mem_op(..., s2); + // goto merge_bb; + // ... + // default: + // mem_op(..., size); + // goto merge_bb; + // } + // merge_bb: + + BasicBlock *BB = MI->getParent(); + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + auto OrigBBFreq = BFI.getBlockFreq(BB); + + BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock::iterator It(*MI); + ++It; + assert(It != DefaultBB->end()); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + MergeBB->setName("MemOP.Merge"); + BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); + DefaultBB->setName("MemOP.Default"); + + auto &Ctx = Func.getContext(); + IRBuilder<> IRB(BB); + BB->getTerminator()->eraseFromParent(); + Value *SizeVar = MI->getLength(); + SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + + // Clear the value profile data. + MI->setMetadata(LLVMContext::MD_prof, nullptr); + // If all promoted, we don't need the MD.prof metadata. + if (SavedRemainCount > 0 || Version != NumVals) + // Otherwise we need update with the un-promoted records back. + annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), + SavedRemainCount, IPVK_MemOPSize, NumVals); + + DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + + for (uint64_t SizeId : SizeIds) { + ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); + BasicBlock *CaseBB = BasicBlock::Create( + Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); + Instruction *NewInst = MI->clone(); + // Fix the argument. + dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewInst); + IRBuilder<> IRBCase(CaseBB); + IRBCase.CreateBr(MergeBB); + SI->addCase(CaseSizeId, CaseBB); + DEBUG(dbgs() << *CaseBB << "\n"); + } + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); + + DEBUG(dbgs() << *BB << "\n"); + DEBUG(dbgs() << *DefaultBB << "\n"); + DEBUG(dbgs() << *MergeBB << "\n"); + + emitOptimizationRemark(Func.getContext(), "memop-opt", Func, + MI->getDebugLoc(), + Twine("optimize ") + getMIName(MI) + " with count " + + Twine(SumForOpt) + " out of " + Twine(TotalCount) + + " for " + Twine(Version) + " versions"); + + return true; +} +} // namespace + +static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { + if (DisableMemOPOPT) + return false; + + if (F.hasFnAttribute(Attribute::OptimizeForSize)) + return false; + MemOPSizeOpt MemOPSizeOpt(F, BFI); + MemOPSizeOpt.perform(); + return MemOPSizeOpt.isChanged(); +} + +bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { + BlockFrequencyInfo &BFI = + getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); + return PGOMemOPSizeOptImpl(F, BFI); +} + +namespace llvm { +char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; + +PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = PreservedAnalyses(); + PA.preserve<GlobalsAA>(); + return PA; +} +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 8aa40d1759de..e3c36c98ab0d 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -61,7 +61,7 @@ static const char *const SanCov8bitCountersInitName = "__sanitizer_cov_8bit_counters_init"; static const char *const SanCovGuardsSectionName = "sancov_guards"; -static const char *const SanCovCountersSectionName = "sancov_counters"; +static const char *const SanCovCountersSectionName = "sancov_cntrs"; static cl::opt<int> ClCoverageLevel( "sanitizer-coverage-level", diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 7b625b9b136e..2a4c9526dfcd 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -551,7 +551,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { BBChanged = true; } } - }; + } FnChanged |= BBChanged; } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index c4f450949e6d..0f92760a874b 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -506,7 +507,7 @@ private: if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) { // Optimize MemoryPhi nodes that may become redundant by having all the // same input values once MA is removed. - SmallVector<MemoryPhi *, 4> PhisToCheck; + SmallSetVector<MemoryPhi *, 4> PhisToCheck; SmallVector<MemoryAccess *, 8> WorkQueue; WorkQueue.push_back(MA); // Process MemoryPhi nodes in FIFO order using a ever-growing vector since @@ -517,7 +518,7 @@ private: for (auto *U : WI->users()) if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U)) - PhisToCheck.push_back(MP); + PhisToCheck.insert(MP); MSSAUpdater->removeMemoryAccess(WI); diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 8634816e702f..5fd2dfc118b4 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -64,6 +64,17 @@ using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed"); +namespace llvm { +namespace GVNExpression { + +LLVM_DUMP_METHOD void Expression::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +} +} + namespace { static bool isMemoryInst(const Instruction *I) { diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index b706152f30c8..8b435050ac76 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -983,21 +983,21 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); + if (StoreSize != 1) + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), + SCEV::FlagNUW); + + Value *NumBytes = + Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must neccessarily be unordered // by previous checks. - if (!SI->isAtomic() && !LI->isAtomic()) { - if (StoreSize != 1) - NumBytesS = SE->getMulExpr( - NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); - - Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); - + if (!SI->isAtomic() && !LI->isAtomic()) NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); - } else { + else { // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. if (Align < StoreSize) @@ -1010,11 +1010,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) return false; - Value *NumElements = - Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + NewCall = Builder.CreateElementUnorderedAtomicMemCpy( + StoreBasePtr, LoadBasePtr, NumBytes, StoreSize); - NewCall = Builder.CreateElementAtomicMemCpy(StoreBasePtr, LoadBasePtr, - NumElements, StoreSize); // Propagate alignment info onto the pointer args. Note that unordered // atomic loads/stores are *required* by the spec to have an alignment // but non-atomic loads/stores may not. diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 6926aae37963..cbbd55512c9f 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -2195,7 +2195,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // For a given expression, mark the phi of ops instructions that could have // changed as a result. void NewGVN::markPhiOfOpsChanged(const Expression *E) { - touchAndErase(ExpressionToPhiOfOps, E); + touchAndErase(ExpressionToPhiOfOps, ExactEqualsExpression(*E)); } // Perform congruence finding on a given value numbering expression. @@ -3561,7 +3561,7 @@ bool NewGVN::eliminateInstructions(Function &F) { // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in // the block and subtract the pred count, but it's more complicated. if (ReachablePredCount.lookup(BB) != - std::distance(pred_begin(BB), pred_end(BB))) { + unsigned(std::distance(pred_begin(BB), pred_end(BB)))) { for (auto II = BB->begin(); isa<PHINode>(II); ++II) { auto &PHI = cast<PHINode>(*II); ReplaceUnreachablePHIArgs(PHI, BB); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index bae7911d222c..a52739bb76f7 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -89,10 +89,10 @@ struct RewriteStatepointsForGC : public ModulePass { Changed |= runOnFunction(F); if (Changed) { - // stripNonValidAttributes asserts that shouldRewriteStatepointsIn + // stripNonValidAttributesAndMetadata asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripNonValidAttributes(M); + stripNonValidAttributesAndMetadata(M); } return Changed; @@ -105,20 +105,24 @@ struct RewriteStatepointsForGC : public ModulePass { AU.addRequired<TargetTransformInfoWrapperPass>(); } - /// The IR fed into RewriteStatepointsForGC may have had attributes implying - /// dereferenceability that are no longer valid/correct after - /// RewriteStatepointsForGC has run. This is because semantically, after + /// The IR fed into RewriteStatepointsForGC may have had attributes and + /// metadata implying dereferenceability that are no longer valid/correct after + /// RewriteStatepointsForGC has run. This is because semantically, after /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire - /// heap. stripNonValidAttributes (conservatively) restores correctness - /// by erasing all attributes in the module that externally imply - /// dereferenceability. - /// Similar reasoning also applies to the noalias attributes. gc.statepoint - /// can touch the entire heap including noalias objects. - void stripNonValidAttributes(Module &M); - - // Helpers for stripNonValidAttributes - void stripNonValidAttributesFromBody(Function &F); + /// heap. stripNonValidAttributesAndMetadata (conservatively) restores + /// correctness by erasing all attributes in the module that externally imply + /// dereferenceability. Similar reasoning also applies to the noalias + /// attributes and metadata. gc.statepoint can touch the entire heap including + /// noalias objects. + void stripNonValidAttributesAndMetadata(Module &M); + + // Helpers for stripNonValidAttributesAndMetadata + void stripNonValidAttributesAndMetadataFromBody(Function &F); void stripNonValidAttributesFromPrototype(Function &F); + // Certain metadata on instructions are invalid after running RS4GC. + // Optimizations that run after RS4GC can incorrectly use this metadata to + // optimize functions. We drop such metadata on the instruction. + void stripInvalidMetadataFromInstruction(Instruction &I); }; } // namespace @@ -2306,13 +2310,44 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); } -void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { +void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) { + + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + return; + // These are the attributes that are still valid on loads and stores after + // RS4GC. + // The metadata implying dereferenceability and noalias are (conservatively) + // dropped. This is because semantically, after RewriteStatepointsForGC runs, + // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can + // touch the entire heap including noalias objects. Note: The reasoning is + // same as stripping the dereferenceability and noalias attributes that are + // analogous to the metadata counterparts. + // We also drop the invariant.load metadata on the load because that metadata + // implies the address operand to the load points to memory that is never + // changed once it became dereferenceable. This is no longer true after RS4GC. + // Similar reasoning applies to invariant.group metadata, which applies to + // loads within a group. + unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_alias_scope, + LLVMContext::MD_nontemporal, + LLVMContext::MD_nonnull, + LLVMContext::MD_align, + LLVMContext::MD_type}; + + // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC. + I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); + +} + +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) { if (F.empty()) return; LLVMContext &Ctx = F.getContext(); MDBuilder Builder(Ctx); + for (Instruction &I : instructions(F)) { if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); @@ -2333,6 +2368,8 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); } + stripInvalidMetadataFromInstruction(I); + if (CallSite CS = CallSite(&I)) { for (int i = 0, e = CS.arg_size(); i != e; i++) if (isa<PointerType>(CS.getArgument(i)->getType())) @@ -2357,7 +2394,7 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } -void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) { #ifndef NDEBUG assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!"); #endif @@ -2366,7 +2403,7 @@ void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { stripNonValidAttributesFromPrototype(F); for (Function &F : M) - stripNonValidAttributesFromBody(F); + stripNonValidAttributesAndMetadataFromBody(F); } bool RewriteStatepointsForGC::runOnFunction(Function &F) { diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 815492ac354c..c6929c33b3e9 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -515,10 +515,6 @@ private: void visitCmpInst(CmpInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); - void visitLandingPadInst(LandingPadInst &I) { markOverdefined(&I); } - void visitFuncletPadInst(FuncletPadInst &FPI) { - markOverdefined(&FPI); - } void visitCatchSwitchInst(CatchSwitchInst &CPI) { markOverdefined(&CPI); visitTerminatorInst(CPI); @@ -539,13 +535,6 @@ private: void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitFenceInst (FenceInst &I) { /*returns void*/ } - void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { - markOverdefined(&I); - } - void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } - void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVAArgInst (Instruction &I) { markOverdefined(&I); } - void visitInstruction(Instruction &I) { // If a new instruction is added to LLVM that we don't handle. DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n'); diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 24d28a6c2831..5d57ed9718fb 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -142,8 +142,139 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { return false; } -void CodeExtractor::findAllocas(ValueSet &SinkCands) const { +static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { + BasicBlock *CommonExitBlock = nullptr; + auto hasNonCommonExitSucc = [&](BasicBlock *Block) { + for (auto *Succ : successors(Block)) { + // Internal edges, ok. + if (Blocks.count(Succ)) + continue; + if (!CommonExitBlock) { + CommonExitBlock = Succ; + continue; + } + if (CommonExitBlock == Succ) + continue; + + return true; + } + return false; + }; + + if (any_of(Blocks, hasNonCommonExitSucc)) + return nullptr; + + return CommonExitBlock; +} + +bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( + Instruction *Addr) const { + AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + + if (isa<DbgInfoIntrinsic>(II)) + continue; + + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast<StoreInst>(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast<LoadInst>(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast<Constant>(MemAddr)) + break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!dyn_cast<AllocaInst>(Base) || Base == AI) + return false; + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + break; + return false; + } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) + return false; + } + } + } + } + + return true; +} + +BasicBlock * +CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { + BasicBlock *SinglePredFromOutlineRegion = nullptr; + assert(!Blocks.count(CommonExitBlock) && + "Expect a block outside the region!"); + for (auto *Pred : predecessors(CommonExitBlock)) { + if (!Blocks.count(Pred)) + continue; + if (!SinglePredFromOutlineRegion) { + SinglePredFromOutlineRegion = Pred; + } else if (SinglePredFromOutlineRegion != Pred) { + SinglePredFromOutlineRegion = nullptr; + break; + } + } + + if (SinglePredFromOutlineRegion) + return SinglePredFromOutlineRegion; + +#ifndef NDEBUG + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast<PHINode>(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; + // If there are any phi nodes, the single pred either exists or has already + // be created before code extraction. + assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); +#endif + + BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( + CommonExitBlock->getFirstNonPHI()->getIterator()); + + for (auto *Pred : predecessors(CommonExitBlock)) { + if (Blocks.count(Pred)) + continue; + Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); + } + // Now add the old exit block to the outline region. + Blocks.insert(CommonExitBlock); + return CommonExitBlock; +} + +void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); + ExitBlock = getCommonExitBlock(Blocks); + for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; @@ -152,49 +283,96 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands) const { if (!AI) continue; - // Returns true if matching life time markers are found within - // the outlined region. - auto GetLifeTimeMarkers = [&](Instruction *Addr) { + // Find the pair of life time markers for address 'Addr' that are either + // defined inside the outline region or can legally be shrinkwrapped into + // the outline region. If there are not other untracked uses of the + // address, return the pair of markers if found; otherwise return a pair + // of nullptr. + auto GetLifeTimeMarkers = + [&](Instruction *Addr, bool &SinkLifeStart, + bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> { Instruction *LifeStart = nullptr, *LifeEnd = nullptr; - for (User *U : Addr->users()) { - if (!definedInRegion(Blocks, U)) - return false; + for (User *U : Addr->users()) { IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); if (IntrInst) { - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { + // Do not handle the case where AI has multiple start markers. + if (LifeStart) + return std::make_pair<Instruction *>(nullptr, nullptr); LifeStart = IntrInst; - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + } + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { + if (LifeEnd) + return std::make_pair<Instruction *>(nullptr, nullptr); LifeEnd = IntrInst; + } + continue; } + // Find untracked uses of the address, bail. + if (!definedInRegion(Blocks, U)) + return std::make_pair<Instruction *>(nullptr, nullptr); } - return LifeStart && LifeEnd; + + if (!LifeStart || !LifeEnd) + return std::make_pair<Instruction *>(nullptr, nullptr); + + SinkLifeStart = !definedInRegion(Blocks, LifeStart); + HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); + // Do legality Check. + if ((SinkLifeStart || HoistLifeEnd) && + !isLegalToShrinkwrapLifetimeMarkers(Addr)) + return std::make_pair<Instruction *>(nullptr, nullptr); + + // Check to see if we have a place to do hoisting, if not, bail. + if (HoistLifeEnd && !ExitBlock) + return std::make_pair<Instruction *>(nullptr, nullptr); + + return std::make_pair(LifeStart, LifeEnd); }; - if (GetLifeTimeMarkers(AI)) { + bool SinkLifeStart = false, HoistLifeEnd = false; + auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); + + if (Markers.first) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); continue; } - // Follow the bitcast: + // Follow the bitcast. Instruction *MarkerAddr = nullptr; for (User *U : AI->users()) { - if (U->stripPointerCasts() == AI) { + + if (U->stripInBoundsConstantOffsets() == AI) { + SinkLifeStart = false; + HoistLifeEnd = false; Instruction *Bitcast = cast<Instruction>(U); - if (GetLifeTimeMarkers(Bitcast)) { + Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); + if (Markers.first) { MarkerAddr = Bitcast; continue; } } + + // Found unknown use of AI. if (!definedInRegion(Blocks, U)) { MarkerAddr = nullptr; break; } } + if (MarkerAddr) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); if (!definedInRegion(Blocks, MarkerAddr)) SinkCands.insert(MarkerAddr); SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); } } } @@ -780,7 +958,8 @@ Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; - ValueSet inputs, outputs, SinkingCands; + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; // Assumption: this is a single-entry code region, and the header is the first // block in the region. @@ -819,7 +998,8 @@ Function *CodeExtractor::extractCodeRegion() { "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); - findAllocas(SinkingCands); + findAllocas(SinkingCands, HoistingCands, CommonExit); + assert(HoistingCands.empty() || CommonExit); // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); @@ -829,6 +1009,13 @@ Function *CodeExtractor::extractCodeRegion() { cast<Instruction>(II)->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast<Instruction>(II)->moveBefore(TI); + } + // Calculate the exit blocks for the extracted region and the total exit // weights for each of those blocks. DenseMap<BasicBlock *, BlockFrequency> ExitWeights; diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index 9e71cba4f1b7..1260e35e934d 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -460,6 +460,9 @@ void PredicateInfo::buildPredicateInfo() { if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) { if (!BI->isConditional()) continue; + // Can't insert conditional information if they all go to the same place. + if (BI->getSuccessor(0) == BI->getSuccessor(1)) + continue; processBranch(BI, BranchBB, OpsToRename); } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) { processSwitch(SI, BranchBB, OpsToRename); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 02a5d3dbeadf..faa14046b1e3 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -352,7 +352,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { return false; typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( - const SCEV *, const SCEV *, SCEV::NoWrapFlags); + const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned); typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( const SCEV *, Type *); @@ -406,10 +406,11 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); const SCEV *A = - (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0u), + WideTy); const SCEV *B = (SE->*Operation)((SE->*Extension)(LHS, WideTy), - (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap, 0u); if (A != B) return false; @@ -530,8 +531,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return false; const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, - SCEV::NoWrapFlags); - + SCEV::NoWrapFlags, unsigned); switch (BO->getOpcode()) { default: return false; @@ -560,7 +560,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoUnsignedWrap(); SE->forgetValue(BO); @@ -572,7 +572,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoSignedWrap(); SE->forgetValue(BO); |