diff options
Diffstat (limited to 'lib')
336 files changed, 8803 insertions, 4902 deletions
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index ee17ad3ba5863..4dfa25490d00d 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -218,8 +218,8 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, return false; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { - if (auto *Inst = getUnknownInst(i)) { - ImmutableCallSite C1(Inst), C2(Inst); + if (auto *UnknownInst = getUnknownInst(i)) { + ImmutableCallSite C1(UnknownInst), C2(Inst); if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || AA.getModRefInfo(C2, C1) != MRI_NoModRef) return true; diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index 0468c794e81dd..3ff27890dc385 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -84,18 +84,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) { Value *B; ConstantInt *C; // (A & B) or (A | B) or (A ^ B). - if (match(V, - m_CombineOr(m_And(m_Value(A), m_Value(B)), - m_CombineOr(m_Or(m_Value(A), m_Value(B)), - m_Xor(m_Value(A), m_Value(B)))))) { + if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) { AddAffected(A); AddAffected(B); // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant. - } else if (match(V, - m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)), - m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)), - m_AShr(m_Value(A), - m_ConstantInt(C)))))) { + } else if (match(V, m_Shift(m_Value(A), m_ConstantInt(C)))) { AddAffected(A); } }; diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index dbb1b01b94ac2..b52a1d7b24d62 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1021,11 +1021,14 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // asking about values from different loop iterations. See PR32314. // TODO: We may be able to change the check so we only do this when // we definitely looked through a PHINode. - KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL); - KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL); - if (Known1.Zero.intersects(Known2.One) || - Known1.One.intersects(Known2.Zero)) - return NoAlias; + if (GEP1LastIdx != GEP2LastIdx && + GEP1LastIdx->getType() == GEP2LastIdx->getType()) { + KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL); + KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL); + if (Known1.Zero.intersects(Known2.One) || + Known1.One.intersects(Known2.Zero)) + return NoAlias; + } } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL)) return NoAlias; } @@ -1345,11 +1348,7 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // Statically, we can see that the base objects are the same, but the // pointers have dynamic offsets which we can't resolve. And none of our // little tricks above worked. - // - // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the - // practical effect of this is protecting TBAA in the case of dynamic - // indices into arrays of unions or malloc'd memory. - return PartialAlias; + return MayAlias; } static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp index dde24ef5fdd57..6e4263920e586 100644 --- a/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -80,9 +80,6 @@ public: const AliasSummary &getAliasSummary() const { return Summary; } }; -/// Try to go from a Value* to a Function*. Never returns nullptr. -static Optional<Function *> parentFunctionOfValue(Value *); - const StratifiedIndex StratifiedLink::SetSentinel = std::numeric_limits<StratifiedIndex>::max(); diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 8f808f3e78719..926b28d6094a5 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -107,6 +107,8 @@ void DemandedBits::determineLiveOperandBits( AB = AOut.byteSwap(); break; case Intrinsic::bitreverse: + // The alive bits of the input are the reversed alive bits of + // the output. AB = AOut.reverseBits(); break; case Intrinsic::ctlz: diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 6ff5938a3175a..77ad6f1e166fd 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -1022,12 +1022,15 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // inlining those. It will prevent inlining in cases where the optimization // does not (yet) fire. + // Maximum valid cost increased in this function. + int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; + // Exit early for a large switch, assuming one case needs at least one // instruction. // FIXME: This is not true for a bit test, but ignore such case for now to // save compile-time. int64_t CostLowerBound = - std::min((int64_t)INT_MAX, + std::min((int64_t)CostUpperBound, (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); if (CostLowerBound > Threshold) { @@ -1044,7 +1047,8 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { if (JumpTableSize) { int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + 4 * InlineConstants::InstrCost; - Cost = std::min((int64_t)INT_MAX, JTCost + Cost); + + Cost = std::min((int64_t)CostUpperBound, JTCost + Cost); return false; } @@ -1068,10 +1072,12 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; return false; } - int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; - uint64_t SwitchCost = + + int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; + int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); + + Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost); return false; } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index a975be79619b7..d9e32a3c417e0 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2688,16 +2688,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, } // icmp pred (and X, Y), X - if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)), - m_And(m_Specific(RHS), m_Value())))) { + if (LBO && match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) { if (Pred == ICmpInst::ICMP_UGT) return getFalse(ITy); if (Pred == ICmpInst::ICMP_ULE) return getTrue(ITy); } // icmp pred X, (and X, Y) - if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)), - m_And(m_Specific(LHS), m_Value())))) { + if (RBO && match(RBO, m_c_And(m_Value(), m_Specific(LHS)))) { if (Pred == ICmpInst::ICMP_UGE) return getTrue(ITy); if (Pred == ICmpInst::ICMP_ULT) diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 3ed61a79478ad..102081e721ac6 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -1324,12 +1324,12 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, return getValueFromICmpCondition(Val, ICI, isTrueDest); // Handle conditions in the form of (cond1 && cond2), we know that on the - // true dest path both of the conditions hold. - if (!isTrueDest) - return LVILatticeVal::getOverdefined(); - + // true dest path both of the conditions hold. Similarly for conditions of + // the form (cond1 || cond2), we know that on the false dest path neither + // condition holds. BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond); - if (!BO || BO->getOpcode() != BinaryOperator::And) + if (!BO || (isTrueDest && BO->getOpcode() != BinaryOperator::And) || + (!isTrueDest && BO->getOpcode() != BinaryOperator::Or)) return LVILatticeVal::getOverdefined(); auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited); @@ -1660,6 +1660,26 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, return nullptr; } +ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, + BasicBlock *FromBB, + BasicBlock *ToBB, + Instruction *CxtI) { + unsigned Width = V->getType()->getIntegerBitWidth(); + const DataLayout &DL = FromBB->getModule()->getDataLayout(); + LVILatticeVal Result = + getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + + if (Result.isUndefined()) + return ConstantRange(Width, /*isFullSet=*/false); + if (Result.isConstantRange()) + return Result.getConstantRange(); + // We represent ConstantInt constants as constant ranges but other kinds + // of integer constants, i.e. ConstantExpr will be tagged as constants + assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) && + "ConstantInt value must be represented as constantrange"); + return ConstantRange(Width, /*isFullSet=*/true); +} + static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, const LVILatticeVal &Val, const DataLayout &DL, diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 96799a459bfc4..591b0fc481d24 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -117,6 +117,16 @@ static bool isDereferenceableAndAlignedPointer( } bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const APInt &Size, + const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT) { + SmallPtrSet<const Value *, 32> Visited; + return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT, + Visited); +} + +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 3fdedbb0ab3c2..263cf42ebe271 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -310,11 +310,11 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( } static bool isVolatile(Instruction *Inst) { - if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + if (auto *LI = dyn_cast<LoadInst>(Inst)) return LI->isVolatile(); - else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + if (auto *SI = dyn_cast<StoreInst>(Inst)) return SI->isVolatile(); - else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) + if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) return AI->isVolatile(); return false; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index aebc80a0a8851..73a95ec405c7b 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -126,11 +126,11 @@ static cl::opt<bool> static cl::opt<unsigned> MulOpsInlineThreshold( "scev-mulops-inline-threshold", cl::Hidden, cl::desc("Threshold for inlining multiplication operands into a SCEV"), - cl::init(1000)); + cl::init(32)); static cl::opt<unsigned> AddOpsInlineThreshold( "scev-addops-inline-threshold", cl::Hidden, - cl::desc("Threshold for inlining multiplication operands into a SCEV"), + cl::desc("Threshold for inlining addition operands into a SCEV"), cl::init(500)); static cl::opt<unsigned> MaxSCEVCompareDepth( @@ -1259,12 +1259,12 @@ static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, if (SE->isKnownPositive(Step)) { *Pred = ICmpInst::ICMP_SLT; return SE->getConstant(APInt::getSignedMinValue(BitWidth) - - SE->getSignedRange(Step).getSignedMax()); + SE->getSignedRangeMax(Step)); } if (SE->isKnownNegative(Step)) { *Pred = ICmpInst::ICMP_SGT; return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - - SE->getSignedRange(Step).getSignedMin()); + SE->getSignedRangeMin(Step)); } return nullptr; } @@ -1279,7 +1279,7 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, *Pred = ICmpInst::ICMP_ULT; return SE->getConstant(APInt::getMinValue(BitWidth) - - SE->getUnsignedRange(Step).getUnsignedMax()); + SE->getUnsignedRangeMax(Step)); } namespace { @@ -1670,7 +1670,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, // is safe. if (isKnownPositive(Step)) { const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - - getUnsignedRange(Step).getUnsignedMax()); + getUnsignedRangeMax(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, @@ -1686,7 +1686,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - - getSignedRange(Step).getSignedMin()); + getSignedRangeMin(Step)); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, @@ -3745,7 +3745,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, // makes it so that we cannot make much use of NUW. auto AddFlags = SCEV::FlagAnyWrap; const bool RHSIsNotMinSigned = - !getSignedRange(RHS).getSignedMin().isMinSignedValue(); + !getSignedRangeMin(RHS).isMinSignedValue(); if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { // Let M be the minimum representable signed value. Then (-1)*RHS // signed-wraps if and only if RHS is M. That can happen even for @@ -4758,9 +4758,9 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { /// Determine the range for a particular SCEV. If SignHint is /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges /// with a "cleaner" unsigned (resp. signed) representation. -ConstantRange -ScalarEvolution::getRange(const SCEV *S, - ScalarEvolution::RangeSignHint SignHint) { +const ConstantRange & +ScalarEvolution::getRangeRef(const SCEV *S, + ScalarEvolution::RangeSignHint SignHint) { DenseMap<const SCEV *, ConstantRange> &Cache = SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; @@ -4791,54 +4791,54 @@ ScalarEvolution::getRange(const SCEV *S, } if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - ConstantRange X = getRange(Add->getOperand(0), SignHint); + ConstantRange X = getRangeRef(Add->getOperand(0), SignHint); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) - X = X.add(getRange(Add->getOperand(i), SignHint)); + X = X.add(getRangeRef(Add->getOperand(i), SignHint)); return setRange(Add, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { - ConstantRange X = getRange(Mul->getOperand(0), SignHint); + ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) - X = X.multiply(getRange(Mul->getOperand(i), SignHint)); + X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint)); return setRange(Mul, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { - ConstantRange X = getRange(SMax->getOperand(0), SignHint); + ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) - X = X.smax(getRange(SMax->getOperand(i), SignHint)); + X = X.smax(getRangeRef(SMax->getOperand(i), SignHint)); return setRange(SMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { - ConstantRange X = getRange(UMax->getOperand(0), SignHint); + ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) - X = X.umax(getRange(UMax->getOperand(i), SignHint)); + X = X.umax(getRangeRef(UMax->getOperand(i), SignHint)); return setRange(UMax, SignHint, ConservativeResult.intersectWith(X)); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { - ConstantRange X = getRange(UDiv->getLHS(), SignHint); - ConstantRange Y = getRange(UDiv->getRHS(), SignHint); + ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); + ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); return setRange(UDiv, SignHint, ConservativeResult.intersectWith(X.udiv(Y))); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { - ConstantRange X = getRange(ZExt->getOperand(), SignHint); + ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint); return setRange(ZExt, SignHint, ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { - ConstantRange X = getRange(SExt->getOperand(), SignHint); + ConstantRange X = getRangeRef(SExt->getOperand(), SignHint); return setRange(SExt, SignHint, ConservativeResult.intersectWith(X.signExtend(BitWidth))); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { - ConstantRange X = getRange(Trunc->getOperand(), SignHint); + ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint); return setRange(Trunc, SignHint, ConservativeResult.intersectWith(X.truncate(BitWidth))); } @@ -5005,8 +5005,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, "Precondition!"); MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); - ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - APInt MaxBECountValue = MaxBECountRange.getUnsignedMax(); + APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount); // First, consider step signed. ConstantRange StartSRange = getSignedRange(Start); @@ -5023,7 +5022,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, // Next, consider step unsigned. ConstantRange UR = getRangeForAffineARHelper( - getUnsignedRange(Step).getUnsignedMax(), getUnsignedRange(Start), + getUnsignedRangeMax(Step), getUnsignedRange(Start), MaxBECountValue, BitWidth, /* Signed = */ false); // Finally, intersect signed and unsigned ranges. @@ -6373,7 +6372,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( // to not. if (isa<SCEVCouldNotCompute>(MaxBECount) && !isa<SCEVCouldNotCompute>(BECount)) - MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax()); + MaxBECount = getConstant(getUnsignedRangeMax(BECount)); return ExitLimit(BECount, MaxBECount, false, {&EL0.Predicates, &EL1.Predicates}); @@ -7647,7 +7646,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { - APInt MaxBECount = getUnsignedRange(Distance).getUnsignedMax(); + APInt MaxBECount = getUnsignedRangeMax(Distance); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this @@ -7680,7 +7679,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, const SCEV *Max = Exact == getCouldNotCompute() ? Exact - : getConstant(getUnsignedRange(Exact).getUnsignedMax()); + : getConstant(getUnsignedRangeMax(Exact)); return ExitLimit(Exact, Max, false, Predicates); } @@ -7689,7 +7688,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, getNegativeSCEV(Start), *this); const SCEV *M = E == getCouldNotCompute() ? E - : getConstant(getUnsignedRange(E).getUnsignedMax()); + : getConstant(getUnsignedRangeMax(E)); return ExitLimit(E, M, false, Predicates); } @@ -7886,12 +7885,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // adding or subtracting 1 from one of the operands. switch (Pred) { case ICmpInst::ICMP_SLE: - if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { + if (!getSignedRangeMax(RHS).isMaxSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; - } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { + } else if (!getSignedRangeMin(LHS).isMinSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; @@ -7899,12 +7898,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } break; case ICmpInst::ICMP_SGE: - if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { + if (!getSignedRangeMin(RHS).isMinSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; - } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { + } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; @@ -7912,23 +7911,23 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, } break; case ICmpInst::ICMP_ULE: - if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { + if (!getUnsignedRangeMax(RHS).isMaxValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; - } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { + } else if (!getUnsignedRangeMin(LHS).isMinValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); Pred = ICmpInst::ICMP_ULT; Changed = true; } break; case ICmpInst::ICMP_UGE: - if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { + if (!getUnsignedRangeMin(RHS).isMinValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); Pred = ICmpInst::ICMP_UGT; Changed = true; - } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { + } else if (!getUnsignedRangeMax(LHS).isMaxValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; @@ -7962,19 +7961,19 @@ trivially_false: } bool ScalarEvolution::isKnownNegative(const SCEV *S) { - return getSignedRange(S).getSignedMax().isNegative(); + return getSignedRangeMax(S).isNegative(); } bool ScalarEvolution::isKnownPositive(const SCEV *S) { - return getSignedRange(S).getSignedMin().isStrictlyPositive(); + return getSignedRangeMin(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { - return !getSignedRange(S).getSignedMin().isNegative(); + return !getSignedRangeMin(S).isNegative(); } bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { - return !getSignedRange(S).getSignedMax().isStrictlyPositive(); + return !getSignedRangeMax(S).isStrictlyPositive(); } bool ScalarEvolution::isKnownNonZero(const SCEV *S) { @@ -8560,7 +8559,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // predicate we're interested in folding. APInt Min = ICmpInst::isSigned(Pred) ? - getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin(); + getSignedRangeMin(V) : getUnsignedRangeMin(V); if (Min == C->getAPInt()) { // Given (V >= Min && V != Min) we conclude V >= (Min + 1). @@ -9115,19 +9114,17 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, const SCEV *One = getOne(Stride->getType()); if (IsSigned) { - APInt MaxRHS = getSignedRange(RHS).getSignedMax(); + APInt MaxRHS = getSignedRangeMax(RHS); APInt MaxValue = APInt::getSignedMaxValue(BitWidth); - APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) - .getSignedMax(); + APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); } - APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); + APInt MaxRHS = getUnsignedRangeMax(RHS); APInt MaxValue = APInt::getMaxValue(BitWidth); - APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) - .getUnsignedMax(); + APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); @@ -9141,19 +9138,17 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, const SCEV *One = getOne(Stride->getType()); if (IsSigned) { - APInt MinRHS = getSignedRange(RHS).getSignedMin(); + APInt MinRHS = getSignedRangeMin(RHS); APInt MinValue = APInt::getSignedMinValue(BitWidth); - APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) - .getSignedMax(); + APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); } - APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); + APInt MinRHS = getUnsignedRangeMin(RHS); APInt MinValue = APInt::getMinValue(BitWidth); - APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) - .getUnsignedMax(); + APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); @@ -9292,8 +9287,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, } else { // Calculate the maximum backedge count based on the range of values // permitted by Start, End, and Stride. - APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() - : getUnsignedRange(Start).getUnsignedMin(); + APInt MinStart = IsSigned ? getSignedRangeMin(Start) + : getUnsignedRangeMin(Start); unsigned BitWidth = getTypeSizeInBits(LHS->getType()); @@ -9301,8 +9296,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, if (PositiveStride) StrideForMaxBECount = - IsSigned ? getSignedRange(Stride).getSignedMin() - : getUnsignedRange(Stride).getUnsignedMin(); + IsSigned ? getSignedRangeMin(Stride) + : getUnsignedRangeMin(Stride); else // Using a stride of 1 is safe when computing max backedge taken count for // a loop with unknown stride. @@ -9316,8 +9311,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, // the case End = RHS. This is safe because in the other case (End - Start) // is zero, leading to a zero maximum backedge taken count. APInt MaxEnd = - IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) - : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); + IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit) + : APIntOps::umin(getUnsignedRangeMax(RHS), Limit); MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), getConstant(StrideForMaxBECount), false); @@ -9325,7 +9320,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVCouldNotCompute>(MaxBECount) && !isa<SCEVCouldNotCompute>(BECount)) - MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax()); + MaxBECount = getConstant(getUnsignedRangeMax(BECount)); return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); } @@ -9376,11 +9371,11 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); - APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax() - : getUnsignedRange(Start).getUnsignedMax(); + APInt MaxStart = IsSigned ? getSignedRangeMax(Start) + : getUnsignedRangeMax(Start); - APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() - : getUnsignedRange(Stride).getUnsignedMin(); + APInt MinStride = IsSigned ? getSignedRangeMin(Stride) + : getUnsignedRangeMin(Stride); unsigned BitWidth = getTypeSizeInBits(LHS->getType()); APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) @@ -9390,8 +9385,8 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, // the case End = RHS. This is safe because in the other case (Start - End) // is zero, leading to a zero maximum backedge taken count. APInt MinEnd = - IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit) - : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit); + IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit) + : APIntOps::umax(getUnsignedRangeMin(RHS), Limit); const SCEV *MaxBECount = getCouldNotCompute(); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index f9b9df2bc707d..47bdac00ae1f3 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -748,18 +748,56 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. Value *Prod = nullptr; - for (const auto &I : OpsAndLoops) { - const SCEV *Op = I.second; + auto I = OpsAndLoops.begin(); + + // Expand the calculation of X pow N in the following manner: + // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: + // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). + const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { + auto E = I; + // Calculate how many times the same operand from the same loop is included + // into this power. + uint64_t Exponent = 0; + const uint64_t MaxExponent = UINT64_MAX >> 1; + // No one sane will ever try to calculate such huge exponents, but if we + // need this, we stop on UINT64_MAX / 2 because we need to exit the loop + // below when the power of 2 exceeds our Exponent, and we want it to be + // 1u << 31 at most to not deal with unsigned overflow. + while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { + ++Exponent; + ++E; + } + assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); + + // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them + // that are needed into the result. + Value *P = expandCodeFor(I->second, Ty); + Value *Result = nullptr; + if (Exponent & 1) + Result = P; + for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { + P = InsertBinop(Instruction::Mul, P, P); + if (Exponent & BinExp) + Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P; + } + + I = E; + assert(Result && "Nothing was expanded?"); + return Result; + }; + + while (I != OpsAndLoops.end()) { if (!Prod) { // This is the first operand. Just expand it. - Prod = expand(Op); - } else if (Op->isAllOnesValue()) { + Prod = ExpandOpBinPowN(); + } else if (I->second->isAllOnesValue()) { // Instead of doing a multiply by negative one, just do a negate. Prod = InsertNoopCastOfTo(Prod, Ty); Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + ++I; } else { // A simple mul. - Value *W = expandCodeFor(Op, Ty); + Value *W = ExpandOpBinPowN(); Prod = InsertNoopCastOfTo(Prod, Ty); // Canonicalize a constant to the RHS. if (isa<Constant>(Prod)) std::swap(Prod, W); diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index e920c4c4e6b2b..cd9972ab56a68 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -58,7 +58,7 @@ // // The struct type node has a name and a list of pairs, one pair for each member // of the struct. The first element of each pair is a type node (a struct type -// node or a sclar type node), specifying the type of the member, the second +// node or a scalar type node), specifying the type of the member, the second // element of each pair is the offset of the member. // // Given an example diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index b065f427b06cb..fd6e3a643bf03 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -686,8 +686,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, - m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, m_ConstantInt(C))), + m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { @@ -698,9 +697,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, Known.Zero |= RHSKnown.Zero << C->getZExtValue(); Known.One |= RHSKnown.One << C->getZExtValue(); // assume(~(v >> c) = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( - m_LShr(m_V, m_ConstantInt(C)), - m_AShr(m_V, m_ConstantInt(C)))), + } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp index ca4d93f99d92d..f24f22c88a8aa 100644 --- a/lib/BinaryFormat/Magic.cpp +++ b/lib/BinaryFormat/Magic.cpp @@ -51,7 +51,8 @@ file_magic llvm::identify_magic(StringRef Magic) { return file_magic::coff_import_library; } // Windows resource file - if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF")) + if (Magic.size() >= sizeof(COFF::WinResMagic) && + memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0) return file_magic::windows_resource; // 0x0000 = COFF unknown machine type if (Magic[1] == 0) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index ad348d723bae0..c48fcaa7b0d1d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2801,26 +2801,24 @@ void AsmPrinter::emitXRayTable() { } // Before we switch over, we force a reference to a label inside the - // xray_instr_map and xray_fn_idx sections. Since this function is always - // called just before the function's end, we assume that this is happening - // after the last return instruction. We also use the synthetic label in the - // xray_inster_map as a delimeter for the range of sleds for this function in - // the index. + // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept + // live by the linker if the function is not garbage-collected. Since this + // function is always called just before the function's end, we assume that + // this is happening after the last return instruction. auto WordSizeBytes = MAI->getCodePointerSize(); - MCSymbol *SledsStart = OutContext.createTempSymbol("xray_synthetic_", true); MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the // range of sleds associated with a function. + MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); OutStreamer->SwitchSection(InstMap); OutStreamer->EmitLabel(SledsStart); for (const auto &Sled : Sleds) Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); - MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_synthetic_end", true); + MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); OutStreamer->EmitLabel(SledsEnd); // We then emit a single entry in the index per function. We use the symbols diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 30bfd7c94e68b..886e6e264b3ec 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -105,7 +105,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { } LLVM_DUMP_METHOD -void DIEAbbrev::print(raw_ostream &O) { +void DIEAbbrev::print(raw_ostream &O) const { O << "Abbreviation @" << format("0x%lx", (long)(intptr_t)this) << " " @@ -128,7 +128,7 @@ void DIEAbbrev::print(raw_ostream &O) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void DIEAbbrev::dump() { +LLVM_DUMP_METHOD void DIEAbbrev::dump() const { print(dbgs()); } #endif @@ -268,7 +268,7 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void DIE::dump() { +LLVM_DUMP_METHOD void DIE::dump() const { print(dbgs()); } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 75eb355bfb543..f1b4d9f20ca96 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -972,16 +972,62 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU, return ConcreteVariables.back().get(); } -// Determine whether this DBG_VALUE is valid at the beginning of the function. -static bool validAtEntry(const MachineInstr *MInsn) { - auto MBB = MInsn->getParent(); - // Is it in the entry basic block? - if (!MBB->pred_empty()) +/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its +/// enclosing lexical scope. The check ensures there are no other instructions +/// in the same lexical scope preceding the DBG_VALUE and that its range is +/// either open or otherwise rolls off the end of the scope. +static bool validThroughout(LexicalScopes &LScopes, + const MachineInstr *DbgValue, + const MachineInstr *RangeEnd) { + assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); + auto MBB = DbgValue->getParent(); + auto DL = DbgValue->getDebugLoc(); + auto *LScope = LScopes.findLexicalScope(DL); + // Scope doesn't exist; this is a dead DBG_VALUE. + if (!LScope) return false; - for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I) - if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup))) + auto &LSRange = LScope->getRanges(); + if (LSRange.size() == 0) + return false; + + // Determine if the DBG_VALUE is valid at the beginning of its lexical block. + const MachineInstr *LScopeBegin = LSRange.front().first; + // Early exit if the lexical scope begins outside of the current block. + if (LScopeBegin->getParent() != MBB) + return false; + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); + for (++Pred; Pred != MBB->rend(); ++Pred) { + if (Pred->getFlag(MachineInstr::FrameSetup)) + break; + auto PredDL = Pred->getDebugLoc(); + if (!PredDL || Pred->isMetaInstruction()) + continue; + // Check whether the instruction preceding the DBG_VALUE is in the same + // (sub)scope as the DBG_VALUE. + if (DL->getScope() == PredDL->getScope()) return false; - return true; + auto *PredScope = LScopes.findLexicalScope(PredDL); + if (!PredScope || LScope->dominates(PredScope)) + return false; + } + + // If the range of the DBG_VALUE is open-ended, report success. + if (!RangeEnd) + return true; + + // Fail if there are instructions belonging to our scope in another block. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (LScopeEnd->getParent() != MBB) + return false; + + // Single, constant DBG_VALUEs in the prologue are promoted to be live + // throughout the function. This is a hack, presumably for DWARF v2 and not + // necessarily correct. It would be much better to use a dbg.declare instead + // if we know the constant is live throughout the scope. + if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) + return true; + + return false; } // Find variables for each lexical scope. @@ -1016,11 +1062,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - // Check if there is a single DBG_VALUE, valid throughout the function. - // A single constant is also considered valid for the entire function. + // Check if there is a single DBG_VALUE, valid throughout the var's scope. if (Ranges.size() == 1 && - (MInsn->getOperand(0).isImm() || - (validAtEntry(MInsn) && Ranges.front().second == nullptr))) { + validThroughout(LScopes, MInsn, Ranges.front().second)) { RegVar->initializeDbgValue(MInsn); continue; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 55a27e2fb79e5..7f3c6da912687 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -92,6 +92,7 @@ add_llvm_library(LLVMCodeGen PatchableFunction.cpp MIRPrinter.cpp MIRPrintingPass.cpp + MacroFusion.cpp OptimizePHIs.cpp ParallelCG.cpp PeepholeOptimizer.cpp diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 37e176099ea7a..cb31c21293f44 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1663,17 +1663,18 @@ class MemCmpExpansion { bool IsUsedForZeroCmp; const DataLayout &DL; - int calculateNumBlocks(unsigned Size); + unsigned calculateNumBlocks(unsigned Size); void createLoadCmpBlocks(); void createResultBlock(); void setupResultBlockPHINodes(); void setupEndBlockPHINodes(); - void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex); + void emitLoadCompareBlock(unsigned Index, unsigned LoadSize, + unsigned GEPIndex); Value *getCompareLoadPairs(unsigned Index, unsigned Size, unsigned &NumBytesProcessed, IRBuilder<> &Builder); void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, unsigned &NumBytesProcessed); - void emitLoadCompareByteBlock(unsigned Index, int GEPIndex); + void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex); void emitMemCmpResultBlock(); Value *getMemCmpExpansionZeroCase(unsigned Size); Value *getMemCmpEqZeroOneBlock(unsigned Size); @@ -1751,7 +1752,8 @@ void MemCmpExpansion::createResultBlock() { // It loads 1 byte from each source of the memcmp parameters with the given // GEPIndex. It then subtracts the two loaded values and adds this result to the // final phi node for selecting the memcmp result. -void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, + unsigned GEPIndex) { IRBuilder<> Builder(CI->getContext()); Value *Source1 = CI->getArgOperand(0); @@ -1833,6 +1835,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(LoadSize <= MaxLoadSize && "Unexpected load type"); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -1851,18 +1854,28 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, ConstantInt::get(LoadSizeType, GEPIndex)); } - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + // Get a constant or load a value for each source address. + Value *LoadSrc1 = nullptr; + if (auto *Source1C = dyn_cast<Constant>(Source1)) + LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); + if (!LoadSrc1) + LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + + Value *LoadSrc2 = nullptr; + if (auto *Source2C = dyn_cast<Constant>(Source2)) + LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); + if (!LoadSrc2) + LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + if (NumLoads != 1) { if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); } // If we have multiple loads per block, we need to generate a composite // comparison using xor+or. Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); - Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); + Diff = Builder.CreateZExt(Diff, MaxLoadType); XorList.push_back(Diff); } else { // If there's only one load per block, we just compare the loaded values. @@ -1926,8 +1939,8 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( // the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with // a special case through emitLoadCompareByteBlock. The special handling can // simply subtract the loaded values and add it to the result phi node. -void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, - int GEPIndex) { +void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, + unsigned GEPIndex) { if (LoadSize == 1) { MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); return; @@ -1937,6 +1950,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(LoadSize <= MaxLoadSize && "Unexpected load type"); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -1970,8 +1984,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, } if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); } // Add the loaded values to the phi nodes for calculating memcmp result only @@ -2034,8 +2048,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() { PhiRes->addIncoming(Res, ResBlock.BB); } -int MemCmpExpansion::calculateNumBlocks(unsigned Size) { - int NumBlocks = 0; +unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) { + unsigned NumBlocks = 0; bool HaveOneByteLoad = false; unsigned RemainingSize = Size; unsigned LoadSize = MaxLoadSize; @@ -2104,13 +2118,13 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { // memcmp sources. It starts with loading using the maximum load size set by // the target. It processes any remaining bytes using a load size which is the // next smallest power of 2. - int LoadSize = MaxLoadSize; - int NumBytesToBeProcessed = Size; + unsigned LoadSize = MaxLoadSize; + unsigned NumBytesToBeProcessed = Size; unsigned Index = 0; while (NumBytesToBeProcessed) { // Calculate how many blocks we can create with the current load size. - int NumBlocks = NumBytesToBeProcessed / LoadSize; - int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; + unsigned NumBlocks = NumBytesToBeProcessed / LoadSize; + unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; // For each NumBlocks, populate the instruction sequence for loading and diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index dccd8e0706ca6..239bad2f53557 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -582,7 +582,7 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MIB.addUse(Zero); } - MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width); + MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width}); return true; } @@ -686,6 +686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(0))) .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; + case Intrinsic::fma: + MIRBuilder.buildInstr(TargetOpcode::G_FMA) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))) + .addUse(getOrCreateVReg(*CI.getArgOperand(2))); + return true; case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 4c0b06dffd216..5466efd7e90f4 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -25,6 +25,18 @@ using namespace llvm; InstructionSelector::InstructionSelector() {} +bool InstructionSelector::constrainOperandRegToRegClass( + MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC, + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + return llvm::constrainRegToClass(MRI, TII, RBI, I, + I.getOperand(OpIdx).getReg(), RC); +} + bool InstructionSelector::constrainSelectedInstRegOperands( MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const { diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index 1b50489deeba9..b699156c568b4 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -50,72 +50,9 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { void Legalizer::init(MachineFunction &MF) { } -bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { - bool Changed = false; - if (MI.getOpcode() != TargetOpcode::G_EXTRACT) - return Changed; - - unsigned NumDefs = (MI.getNumOperands() - 1) / 2; - unsigned SrcReg = MI.getOperand(NumDefs).getReg(); - MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg); - if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE) - return Changed; - - unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2; - bool AllDefsReplaced = true; - - // Try to match each register extracted with a corresponding insertion formed - // by the G_SEQUENCE. - for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) { - MachineOperand &ExtractMO = MI.getOperand(Idx); - assert(ExtractMO.isReg() && ExtractMO.isDef() && - "unexpected extract operand"); - - unsigned ExtractReg = ExtractMO.getReg(); - unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm(); - - while (SeqIdx < NumSeqSrcs && - SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos) - ++SeqIdx; - - if (SeqIdx == NumSeqSrcs) { - AllDefsReplaced = false; - continue; - } - - unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg(); - if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos || - MRI.getType(OrigReg) != MRI.getType(ExtractReg)) { - AllDefsReplaced = false; - continue; - } - - assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) && - "unexpected physical register in G_SEQUENCE"); - - // Finally we can replace the uses. - MRI.replaceRegWith(ExtractReg, OrigReg); - } - - if (AllDefsReplaced) { - // If SeqI was the next instruction in the BB and we removed it, we'd break - // the outer iteration. - assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI && - "G_SEQUENCE does not dominate G_EXTRACT"); - - MI.eraseFromParent(); - - if (MRI.use_empty(SrcReg)) - SeqI.eraseFromParent(); - Changed = true; - } - - return Changed; -} - bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + MachineIRBuilder &MIRBuilder) { if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) return false; @@ -125,18 +62,62 @@ bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES) return false; - if (MergeI.getNumOperands() - 1 != NumDefs) - return false; + const unsigned NumMergeRegs = MergeI.getNumOperands() - 1; - // FIXME: is a COPY appropriate if the types mismatch? We know both registers - // are allocatable by now. - if (MRI.getType(MI.getOperand(0).getReg()) != - MRI.getType(MergeI.getOperand(1).getReg())) - return false; + if (NumMergeRegs < NumDefs) { + if (NumDefs % NumMergeRegs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to UNMERGEs, for example + // %1 = G_MERGE_VALUES %4, %5 + // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 + // to + // %9, %10 = G_UNMERGE_VALUES %4 + // %11, %12 = G_UNMERGE_VALUES %5 + + const unsigned NewNumDefs = NumDefs / NumMergeRegs; + for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { + SmallVector<unsigned, 2> DstRegs; + for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; + ++j, ++DefIdx) + DstRegs.push_back(MI.getOperand(DefIdx).getReg()); + + MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg()); + } + + } else if (NumMergeRegs > NumDefs) { + if (NumMergeRegs % NumDefs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to MERGEs + // %6 = G_MERGE_VALUES %17, %18, %19, %20 + // %7, %8 = G_UNMERGE_VALUES %6 + // to + // %7 = G_MERGE_VALUES %17, %18 + // %8 = G_MERGE_VALUES %19, %20 + + const unsigned NumRegs = NumMergeRegs / NumDefs; + for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { + SmallVector<unsigned, 2> Regs; + for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx) + Regs.push_back(MergeI.getOperand(Idx).getReg()); + + MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); + } - for (unsigned Idx = 0; Idx < NumDefs; ++Idx) - MRI.replaceRegWith(MI.getOperand(Idx).getReg(), - MergeI.getOperand(Idx + 1).getReg()); + } else { + // FIXME: is a COPY appropriate if the types mismatch? We know both + // registers are allocatable by now. + if (MRI.getType(MI.getOperand(0).getReg()) != + MRI.getType(MergeI.getOperand(1).getReg())) + return false; + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) + MRI.replaceRegWith(MI.getOperand(Idx).getReg(), + MergeI.getOperand(Idx + 1).getReg()); + } MI.eraseFromParent(); if (MRI.use_empty(MergeI.getOperand(0).getReg())) @@ -226,13 +207,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Get the next Instruction before we try to legalize, because there's a // good chance MI will be deleted. NextMI = std::next(MI); - - // combineExtracts erases MI. - if (combineExtracts(*MI, MRI, TII)) { - Changed = true; - continue; - } - Changed |= combineMerges(*MI, MRI, TII); + Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder); } } diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 4d45910422967..595802f2228b9 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -75,8 +75,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { // FIXME: the long-term plan calls for expansion in terms of load/store (if // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_SEQUENCE || - Aspect.Opcode == TargetOpcode::G_EXTRACT || + if (Aspect.Opcode == TargetOpcode::G_EXTRACT || Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 79d312fb52ca4..3c70013ea296b 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -425,10 +425,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, .addImm(Index); } -MachineInstrBuilder -MachineIRBuilder::buildSequence(unsigned Res, - ArrayRef<unsigned> Ops, - ArrayRef<uint64_t> Indices) { +void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, + ArrayRef<uint64_t> Indices) { #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); @@ -440,13 +438,31 @@ MachineIRBuilder::buildSequence(unsigned Res, assert(MRI->getType(Op).isValid() && "invalid operand type"); #endif - MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE); - MIB.addDef(Res); + LLT ResTy = MRI->getType(Res); + LLT OpTy = MRI->getType(Ops[0]); + unsigned OpSize = OpTy.getSizeInBits(); + bool MaybeMerge = true; for (unsigned i = 0; i < Ops.size(); ++i) { - MIB.addUse(Ops[i]); - MIB.addImm(Indices[i]); + if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { + MaybeMerge = false; + break; + } + } + + if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) { + buildMerge(Res, Ops); + return; + } + + unsigned ResIn = MRI->createGenericVirtualRegister(ResTy); + buildUndef(ResIn); + + for (unsigned i = 0; i < Ops.size(); ++i) { + unsigned ResOut = + i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy); + buildInsert(ResOut, ResIn, Ops[i], Indices[i]); + ResIn = ResOut; } - return MIB; } MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) { diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 254bdf10d804f..5ecaf5c563f82 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -26,6 +26,23 @@ using namespace llvm; +unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, + MachineInstr &InsertPt, unsigned Reg, + const TargetRegisterClass &RegClass) { + if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) { + unsigned NewReg = MRI.createVirtualRegister(&RegClass); + BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + return NewReg; + } + + return Reg; +} + + unsigned llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, @@ -36,16 +53,7 @@ unsigned llvm::constrainOperandRegClass( "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); - - if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) { - unsigned NewReg = MRI.createVirtualRegister(RegClass); - BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), - TII.get(TargetOpcode::COPY), NewReg) - .addReg(Reg); - return NewReg; - } - - return Reg; + return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); } bool llvm::isTriviallyDead(const MachineInstr &MI, diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c98c9b68ac0e4..ff8405366173e 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1474,8 +1474,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { DontKill.addLiveIns(NextMBB); } + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); @@ -1484,11 +1487,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(&CvtMBB, true); } else { + // Predicate the instructions in the true block. RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Merge converted block into entry block. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI); } @@ -1588,8 +1591,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); } + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); @@ -1603,7 +1609,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Now merge the entry of the triangle with the true block. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI, false); } diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index b831ddfa601a6..e308f49ec4e85 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -359,30 +359,15 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, Offset < PageSize)) return SR_Unsuitable; - // Finally, we need to make sure that the access instruction actually is - // accessing from PointerReg, and there isn't some re-definition of PointerReg - // between the compare and the memory access. - // If PointerReg has been redefined before then there is no sense to continue - // lookup due to this condition will fail for any further instruction. - SuitabilityResult Suitable = SR_Suitable; - for (auto *PrevMI : PrevInsts) - for (auto &PrevMO : PrevMI->operands()) { - if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() && - TRI->regsOverlap(PrevMO.getReg(), PointerReg)) - return SR_Impossible; - - // Check whether the current memory access aliases with previous one. - // If we already found that it aliases then no need to continue. - // But we continue base pointer check as it can result in SR_Impossible. - if (Suitable == SR_Suitable) { - AliasResult AR = areMemoryOpsAliased(MI, PrevMI); - if (AR == AR_WillAliasEverything) - return SR_Impossible; - if (AR == AR_MayAlias) - Suitable = SR_Unsuitable; - } - } - return Suitable; + // Finally, check whether the current memory access aliases with previous one. + for (auto *PrevMI : PrevInsts) { + AliasResult AR = areMemoryOpsAliased(MI, PrevMI); + if (AR == AR_WillAliasEverything) + return SR_Impossible; + if (AR == AR_MayAlias) + return SR_Unsuitable; + } + return SR_Suitable; } bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, @@ -569,6 +554,12 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( return true; } + // If MI re-defines the PointerReg then we cannot move further. + if (any_of(MI.operands(), [&](MachineOperand &MO) { + return MO.isReg() && MO.getReg() && MO.isDef() && + TRI->regsOverlap(MO.getReg(), PointerReg); + })) + return false; InstsSeenSoFar.push_back(&MI); } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index bbd783367c9e8..0c76478af551f 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -1006,7 +1006,7 @@ bool LiveDebugVariables::doInitialization(Module &M) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LiveDebugVariables::dump() { +LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { if (pImpl) static_cast<LDVImpl*>(pImpl)->print(dbgs()); } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index afe87a52544d8..1d7e3d4371a24 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -59,7 +59,7 @@ public: void emitDebugValues(VirtRegMap *VRM); /// dump - Print data structures to dbgs(). - void dump(); + void dump() const; private: diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 590acc01008a6..81597afe6b02b 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -228,6 +228,12 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { } #endif +bool MachineBasicBlock::isLegalToHoistInto() const { + if (isReturnBlock() || hasEHPadSuccessor()) + return false; + return true; +} + StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) return LBB->getName(); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 2a6cb07dbd2da..81c6dace92e04 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const { return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace(); } +/// isDereferenceable - Return true if V is always dereferenceable for +/// Offset + Size byte. +bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const { + if (!V.is<const Value*>()) + return false; + + const Value *BasePtr = V.get<const Value*>(); + if (BasePtr == nullptr) + return false; + + return isDereferenceableAndAlignedPointer(BasePtr, 1, + APInt(DL.getPointerSize(), + Offset + Size), + DL); +} + /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 4c81fd91cb829..22d519e5d88fa 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -23,7 +23,6 @@ using namespace llvm; // Out of line virtual method. void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} -void MachineModuleInfoWasm::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 01a2286b8d66a..eaba9a58557c3 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -542,10 +542,10 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void ReadyQueue::dump() { +LLVM_DUMP_METHOD void ReadyQueue::dump() const { dbgs() << "Queue " << Name << ": "; - for (unsigned i = 0, e = Queue.size(); i < e; ++i) - dbgs() << Queue[i]->NodeNum << " "; + for (const SUnit *SU : Queue) + dbgs() << SU->NodeNum << " "; dbgs() << "\n"; } #endif @@ -609,10 +609,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { /// releaseSuccessors - Call releaseSucc on each of SU's successors. void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - releaseSucc(SU, &*I); - } + for (SDep &Succ : SU->Succs) + releaseSucc(SU, &Succ); } /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When @@ -648,10 +646,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { /// releasePredecessors - Call releasePred on each of SU's predecessors. void ScheduleDAGMI::releasePredecessors(SUnit *SU) { - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - releasePred(SU, &*I); - } + for (SDep &Pred : SU->Preds) + releasePred(SU, &Pred); } /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after @@ -724,8 +720,8 @@ void ScheduleDAGMI::schedule() { DEBUG( if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this); + for (const SUnit &SU : SUnits) + SU.dumpAll(this); if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this); ); @@ -786,28 +782,25 @@ void ScheduleDAGMI::schedule() { /// Apply each ScheduleDAGMutation step in order. void ScheduleDAGMI::postprocessDAG() { - for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { - Mutations[i]->apply(this); - } + for (auto &m : Mutations) + m->apply(this); } void ScheduleDAGMI:: findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, SmallVectorImpl<SUnit*> &BotRoots) { - for (std::vector<SUnit>::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - SUnit *SU = &(*I); - assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + for (SUnit &SU : SUnits) { + assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits"); // Order predecessors so DFSResult follows the critical path. - SU->biasCriticalPath(); + SU.biasCriticalPath(); // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft) - TopRoots.push_back(SU); + if (!SU.NumPredsLeft) + TopRoots.push_back(&SU); // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft) - BotRoots.push_back(SU); + if (!SU.NumSuccsLeft) + BotRoots.push_back(&SU); } ExitSU.biasCriticalPath(); } @@ -822,10 +815,9 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, // // Nodes with unreleased weak edges can still be roots. // Release top roots in forward order. - for (SmallVectorImpl<SUnit*>::const_iterator - I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { - SchedImpl->releaseTopNode(*I); - } + for (SUnit *SU : TopRoots) + SchedImpl->releaseTopNode(SU); + // Release bottom roots in reverse order so the higher priority nodes appear // first. This is more natural and slightly more efficient. for (SmallVectorImpl<SUnit*>::const_reverse_iterator @@ -1029,9 +1021,9 @@ void ScheduleDAGMILive::initRegPressure() { } } DEBUG(dbgs() << "Excess PSets: "; - for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + for (const PressureChange &RCPS : RegionCriticalPSets) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].getPSet()) << " "; + RCPS.getPSet()) << " "; dbgs() << "\n"); } @@ -1040,11 +1032,10 @@ updateScheduledPressure(const SUnit *SU, const std::vector<unsigned> &NewMaxPressure) { const PressureDiff &PDiff = getPressureDiff(SU); unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); - for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); - I != E; ++I) { - if (!I->isValid()) + for (const PressureChange &PC : PDiff) { + if (!PC.isValid()) break; - unsigned ID = I->getPSet(); + unsigned ID = PC.getPSet(); while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) ++CritIdx; if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { @@ -1508,8 +1499,7 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; - for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) { - SUnit *SU = MemOps[Idx]; + for (SUnit *SU : MemOps) { unsigned BaseReg; int64_t Offset; if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) @@ -1537,12 +1527,11 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( // dependent on SUa can prevent load combining due to register reuse. // Predecessor edges do not need to be copied from SUb to SUa since nearby // loads should have effectively the same inputs. - for (SUnit::const_succ_iterator - SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { - if (SI->getSUnit() == SUb) + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) continue; - DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); - DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); } ++ClusterLength; } else @@ -1559,17 +1548,15 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { DenseMap<unsigned, unsigned> StoreChainIDs; // Map each store chain to a set of dependent MemOps. SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if ((IsLoad && !SU->getInstr()->mayLoad()) || - (!IsLoad && !SU->getInstr()->mayStore())) + for (SUnit &SU : DAG->SUnits) { + if ((IsLoad && !SU.getInstr()->mayLoad()) || + (!IsLoad && !SU.getInstr()->mayStore())) continue; unsigned ChainPredID = DAG->SUnits.size(); - for (SUnit::const_pred_iterator - PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { - if (PI->isCtrl()) { - ChainPredID = PI->getSUnit()->NodeNum; + for (const SDep &Pred : SU.Preds) { + if (Pred.isCtrl()) { + ChainPredID = Pred.getSUnit()->NodeNum; break; } } @@ -1580,12 +1567,12 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); if (Result.second) StoreChainDependents.resize(NumChains + 1); - StoreChainDependents[Result.first->second].push_back(SU); + StoreChainDependents[Result.first->second].push_back(&SU); } // Iterate over the store chains. - for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) - clusterNeighboringMemOps(StoreChainDependents[Idx], DAG); + for (auto &SCD : StoreChainDependents) + clusterNeighboringMemOps(SCD, DAG); } //===----------------------------------------------------------------------===// @@ -1728,16 +1715,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); - for (SUnit::const_succ_iterator - I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + for (const SDep &Succ : LastLocalSU->Succs) { + if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg) continue; - if (I->getSUnit() == GlobalSU) + if (Succ.getSUnit() == GlobalSU) continue; - if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) return; - LocalUses.push_back(I->getSUnit()); + LocalUses.push_back(Succ.getSUnit()); } // Open the top of the GlobalLI hole by constraining any earlier global uses // to precede the start of LocalLI. @@ -1745,15 +1730,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { MachineInstr *FirstLocalDef = LIS->getInstructionFromIndex(LocalLI->beginIndex()); SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); - for (SUnit::const_pred_iterator - I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { - if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + for (const SDep &Pred : GlobalSU->Preds) { + if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg) continue; - if (I->getSUnit() == FirstLocalSU) + if (Pred.getSUnit() == FirstLocalSU) continue; - if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) return; - GlobalUses.push_back(I->getSUnit()); + GlobalUses.push_back(Pred.getSUnit()); } DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); // Add the weak edges. @@ -1784,12 +1768,11 @@ void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { RegionEndIdx = DAG->getLIS()->getInstructionIndex( *priorNonDebug(DAG->end(), DAG->begin())); - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - SUnit *SU = &DAG->SUnits[Idx]; - if (!SU->getInstr()->isCopy()) + for (SUnit &SU : DAG->SUnits) { + if (!SU.getInstr()->isCopy()) continue; - constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); + constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG)); } } @@ -1840,10 +1823,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!SchedModel->hasInstrSchedModel()) return; RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); - for (std::vector<SUnit>::iterator - I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { - const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + for (SUnit &SU : DAG->SUnits) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); + RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), @@ -1957,12 +1939,11 @@ unsigned SchedBoundary:: findMaxLatency(ArrayRef<SUnit*> ReadySUs) { SUnit *LateSU = nullptr; unsigned RemLatency = 0; - for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); + for (SUnit *SU : ReadySUs) { + unsigned L = getUnscheduledLatency(SU); if (L > RemLatency) { RemLatency = L; - LateSU = *I; + LateSU = SU; } } if (LateSU) { @@ -2328,7 +2309,7 @@ SUnit *SchedBoundary::pickOnlyChoice() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. -LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() { +LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { unsigned ResFactor; unsigned ResCount; if (ZoneCritResIdx) { @@ -2667,7 +2648,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } -void GenericScheduler::dumpPolicy() { +void GenericScheduler::dumpPolicy() const { // Cannot completely remove virtual function even in release mode. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << "GenericScheduler RegionPolicy: " @@ -2719,10 +2700,9 @@ void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (std::vector<SUnit*>::const_iterator - I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : Bot.Available) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { @@ -2969,10 +2949,10 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); ReadyQueue &Q = Zone.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(ZonePolicy); - initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidate(Cand, TryCand, ZoneArg); @@ -3118,18 +3098,17 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. - for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); - I != E; ++I) { - if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + for (SDep &Dep : Deps) { + if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg())) continue; - SUnit *DepSU = I->getSUnit(); + SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) continue; MachineInstr *Copy = DepSU->getInstr(); if (!Copy->isCopy()) continue; DEBUG(dbgs() << " Rescheduling physreg copy "; - I->getSUnit()->dump(DAG)); + Dep.getSUnit()->dump(DAG)); DAG->moveInstruction(Copy, InsertPos); } } @@ -3204,10 +3183,9 @@ void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); // Some roots may not feed into ExitSU. Check all of them in case. - for (SmallVectorImpl<SUnit*>::const_iterator - I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); + for (const SUnit *SU : BotRoots) { + if (SU->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = SU->getDepth(); } DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); if (DumpCriticalPathLength) { @@ -3260,9 +3238,9 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { ReadyQueue &Q = Top.Available; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + for (SUnit *SU : Q) { SchedCandidate TryCand(Cand.Policy); - TryCand.SU = *I; + TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); tryCandidate(Cand, TryCand); diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp new file mode 100644 index 0000000000000..45ea0e4c39ab4 --- /dev/null +++ b/lib/CodeGen/MacroFusion.cpp @@ -0,0 +1,150 @@ +//===- MacroFusion.cpp - Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the implementation of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define DEBUG_TYPE "misched" + +STATISTIC(NumFused, "Number of instr pairs fused"); + +using namespace llvm; + +static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + +namespace { + +static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, + SUnit &SecondSU) { + // Create a single weak edge between the adjacent instrs. The only effect is + // to cause bottom-up scheduling to heavily prioritize the clustered instrs. + DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); + + // Adjust the latency between the anchor instr and its + // predecessors. + for (SDep &IDep : SecondSU.Preds) + if (IDep.getSUnit() == &FirstSU) + IDep.setLatency(0); + + // Adjust the latency between the dependent instr and its + // predecessors. + for (SDep &IDep : FirstSU.Succs) + if (IDep.getSUnit() == &SecondSU) + IDep.setLatency(0); + + DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse "; + FirstSU.print(dbgs(), &DAG); dbgs() << " - "; + SecondSU.print(dbgs(), &DAG); dbgs() << " / "; + dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << + DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); + + if (&SecondSU != &DAG.ExitSU) + // Make instructions dependent on FirstSU also dependent on SecondSU to + // prevent them from being scheduled between FirstSU and and SecondSU. + for (const SDep &SI : FirstSU.Succs) { + if (SI.getSUnit() == &SecondSU) + continue; + DEBUG(dbgs() << " Copy Succ "; + SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';); + DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial)); + } + + ++NumFused; +} + + +/// \brief Post-process the DAG to create cluster edges between instrs that may +/// be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { + ShouldSchedulePredTy shouldScheduleAdjacent; + bool FuseBlock; + bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU); + +public: + MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock) + : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {} + + void apply(ScheduleDAGInstrs *DAGInstrs) override; +}; + +void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + + if (FuseBlock) + // For each of the SUnits in the scheduling block, try to fuse the instr in + // it with one in its predecessors. + for (SUnit &ISU : DAG->SUnits) + scheduleAdjacentImpl(*DAG, ISU); + + if (DAG->ExitSU.getInstr()) + // Try to fuse the instr in the ExitSU with one in its predecessors. + scheduleAdjacentImpl(*DAG, DAG->ExitSU); +} + +/// \brief Implement the fusion of instr pairs in the scheduling DAG, +/// anchored at the instr in AnchorSU.. +bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { + const MachineInstr &AnchorMI = *AnchorSU.getInstr(); + const TargetInstrInfo &TII = *DAG.TII; + const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); + + // Check if the anchor instr may be fused. + if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI)) + return false; + + // Explorer for fusion candidates among the dependencies of the anchor instr. + for (SDep &Dep : AnchorSU.Preds) { + // Ignore dependencies that don't enforce ordering. + if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output || + Dep.isWeak()) + continue; + + SUnit &DepSU = *Dep.getSUnit(); + if (DepSU.isBoundaryNode()) + continue; + + const MachineInstr *DepMI = DepSU.getInstr(); + if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) + continue; + + fuseInstructionPair(DAG, DepSU, AnchorSU); + return true; + } + + return false; +} + +} // end anonymous namespace + + +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> +createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { + if(EnableMacroFusion) + return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true); + return nullptr; +} + +std::unique_ptr<ScheduleDAGMutation> +createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) { + if(EnableMacroFusion) + return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false); + return nullptr; +} + +} // end namespace llvm diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 1aed58c36e17d..05e641d9489d9 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -35,6 +35,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> #include <cassert> #include <iterator> #include <limits> @@ -260,6 +261,14 @@ void RegScavenger::backward() { const MachineInstr &MI = *MBBI; LiveUnits.stepBackward(MI); + // Expire scavenge spill frameindex uses. + for (ScavengedInfo &I : Scavenged) { + if (I.Restore == &MI) { + I.Reg = 0; + I.Restore = nullptr; + } + } + if (MBBI == MBB->begin()) { MBBI = MachineBasicBlock::iterator(nullptr); Tracking = false; @@ -356,6 +365,80 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, return Survivor; } +/// Given the bitvector \p Available of free register units at position +/// \p From. Search backwards to find a register that is part of \p +/// Candidates and not used/clobbered until the point \p To. If there is +/// multiple candidates continue searching and pick the one that is not used/ +/// clobbered for the longest time. +/// Returns the register and the earliest position we know it to be free or +/// the position MBB.end() if no register is available. +static std::pair<MCPhysReg, MachineBasicBlock::iterator> +findSurvivorBackwards(const MachineRegisterInfo &MRI, + MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, + const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) { + bool FoundTo = false; + MCPhysReg Survivor = 0; + MachineBasicBlock::iterator Pos; + MachineBasicBlock &MBB = *From->getParent(); + unsigned InstrLimit = 25; + unsigned InstrCountDown = InstrLimit; + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + LiveRegUnits Used(TRI); + + for (MachineBasicBlock::iterator I = From;; --I) { + const MachineInstr &MI = *I; + + Used.accumulateBackward(MI); + + if (I == To) { + // See if one of the registers in RC wasn't used so far. + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg) && + LiveOut.available(Reg)) + return std::make_pair(Reg, MBB.end()); + } + // Otherwise we will continue up to InstrLimit instructions to find + // the register which is not defined/used for the longest time. + FoundTo = true; + Pos = To; + } + if (FoundTo) { + if (Survivor == 0 || !Used.available(Survivor)) { + MCPhysReg AvilableReg = 0; + for (MCPhysReg Reg : AllocationOrder) { + if (!MRI.isReserved(Reg) && Used.available(Reg)) { + AvilableReg = Reg; + break; + } + } + if (AvilableReg == 0) + break; + Survivor = AvilableReg; + } + if (--InstrCountDown == 0) + break; + + // Keep searching when we find a vreg since the spilled register will + // be usefull for this other vreg as well later. + bool FoundVReg = false; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + FoundVReg = true; + break; + } + } + if (FoundVReg) { + InstrCountDown = InstrLimit; + Pos = I; + } + if (I == MBB.begin()) + break; + } + } + + return std::make_pair(Survivor, Pos); +} + static unsigned getFrameIndexOperandNum(MachineInstr &MI) { unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -365,44 +448,16 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) { return i; } -unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, - MachineBasicBlock::iterator I, - int SPAdj) { - MachineInstr &MI = *I; - const MachineFunction &MF = *MI.getParent()->getParent(); - // Consider all allocatable registers in the register class initially - BitVector Candidates = TRI->getAllocatableSet(MF, RC); - - // Exclude all the registers being used by the instruction. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) - Candidates.reset(*AI); - } - - // Try to find a register that's unused if there is one, as then we won't - // have to spill. - BitVector Available = getRegsAvailable(RC); - Available &= Candidates; - if (Available.any()) - Candidates = Available; - - // Find the register whose use is furthest away. - MachineBasicBlock::iterator UseMI; - unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - - // If we found an unused register there is no reason to spill it. - if (!isRegUsed(SReg)) { - DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); - return SReg; - } - +RegScavenger::ScavengedInfo & +RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + MachineBasicBlock::iterator Before, + MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. + const MachineFunction &MF = *Before->getParent()->getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned NeedSize = TRI->getSpillSize(*RC); - unsigned NeedAlign = TRI->getSpillAlignment(*RC); + unsigned NeedSize = TRI->getSpillSize(RC); + unsigned NeedAlign = TRI->getSpillAlignment(RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); @@ -437,39 +492,72 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Avoid infinite regress - Scavenged[SI].Reg = SReg; + Scavenged[SI].Reg = Reg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. - if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { - // Spill the scavenged register before I. + if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) { + // Spill the scavenged register before \p Before. int FI = Scavenged[SI].FrameIndex; if (FI < FIB || FI >= FIE) { std::string Msg = std::string("Error while trying to spill ") + - TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) + + TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) + ": Cannot scavenge register without an emergency spill slot!"; report_fatal_error(Msg.c_str()); } - TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, - RC, TRI); - MachineBasicBlock::iterator II = std::prev(I); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex, + &RC, TRI); + MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, - RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex, + &RC, TRI); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } + return Scavenged[SI]; +} - Scavenged[SI].Restore = &*std::prev(UseMI); +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + MachineInstr &MI = *I; + const MachineFunction &MF = *MI.getParent()->getParent(); + // Consider all allocatable registers in the register class initially + BitVector Candidates = TRI->getAllocatableSet(MF, RC); - // Doing this here leads to infinite regress. - // Scavenged[SI].Reg = SReg; + // Exclude all the registers being used by the instruction. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + BitVector Available = getRegsAvailable(RC); + Available &= Candidates; + if (Available.any()) + Candidates = Available; + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + + // If we found an unused register there is no reason to spill it. + if (!isRegUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + return SReg; + } + + ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); + Scavenged.Restore = &*std::prev(UseMI); DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); @@ -477,85 +565,195 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } -void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { - // FIXME: Iterating over the instruction stream is unnecessary. We can simply - // iterate over the vreg use list, which at this point only contains machine - // operands for which eliminateFrameIndex need a new scratch reg. +unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, + MachineBasicBlock::iterator To, + bool RestoreAfter, int SPAdj) { + const MachineBasicBlock &MBB = *To->getParent(); + const MachineFunction &MF = *MBB.getParent(); - // Run through the instructions and find any virtual registers. - MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) { - RS.enterBasicBlock(MBB); - - int SPAdj = 0; - - // The instruction stream may change in the loop, so check MBB.end() - // directly. - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - // We might end up here again with a NULL iterator if we scavenged a - // register for which we inserted spill code for definition by what was - // originally the first instruction in MBB. - if (I == MachineBasicBlock::iterator(nullptr)) - I = MBB.begin(); - - const MachineInstr &MI = *I; - MachineBasicBlock::iterator J = std::next(I); - MachineBasicBlock::iterator P = - I == MBB.begin() ? MachineBasicBlock::iterator(nullptr) - : std::prev(I); - - // RS should process this instruction before we might scavenge at this - // location. This is because we might be replacing a virtual register - // defined by this instruction, and if so, registers killed by this - // instruction are available, and defined registers are not. - RS.forward(I); + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF); + std::pair<MCPhysReg, MachineBasicBlock::iterator> P = + findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder); + MCPhysReg Reg = P.first; + MachineBasicBlock::iterator SpillBefore = P.second; + assert(Reg != 0 && "No register left to scavenge!"); + // Found an available register? + if (SpillBefore != MBB.end()) { + MachineBasicBlock::iterator ReloadAfter = + RestoreAfter ? std::next(MBBI) : MBBI; + MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); + DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); + ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); + Scavenged.Restore = &*std::prev(SpillBefore); + LiveUnits.removeReg(Reg); + DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI) + << " until " << *SpillBefore); + } else { + DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n'); + } + return Reg; +} - for (const MachineOperand &MO : MI.operands()) { +/// Allocate a register for the virtual register \p VReg. The last use of +/// \p VReg is around the current position of the register scavenger \p RS. +/// \p ReserveAfter controls whether the scavenged register needs to be reserved +/// after the current instruction, otherwise it will only be reserved before the +/// current instruction. +static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, + unsigned VReg, bool ReserveAfter) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); +#ifndef NDEBUG + // Verify that all definitions and uses are in the same basic block. + const MachineBasicBlock *CommonMBB = nullptr; + // Real definition for the reg, re-definitions are not considered. + const MachineInstr *RealDef = nullptr; + for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) { + MachineBasicBlock *MBB = MO.getParent()->getParent(); + if (CommonMBB == nullptr) + CommonMBB = MBB; + assert(MBB == CommonMBB && "All defs+uses must be in the same basic block"); + if (MO.isDef()) { + const MachineInstr &MI = *MO.getParent(); + if (!MI.readsRegister(VReg, &TRI)) { + assert((!RealDef || RealDef == &MI) && + "Can have at most one definition which is not a redefinition"); + RealDef = &MI; + } + } + } + assert(RealDef != nullptr && "Must have at least 1 Def"); +#endif + + // We should only have one definition of the register. However to accomodate + // the requirements of two address code we also allow definitions in + // subsequent instructions provided they also read the register. That way + // we get a single contiguous lifetime. + // + // Definitions in MRI.def_begin() are unordered, search for the first. + MachineRegisterInfo::def_iterator FirstDef = + std::find_if(MRI.def_begin(VReg), MRI.def_end(), + [VReg, &TRI](const MachineOperand &MO) { + return !MO.getParent()->readsRegister(VReg, &TRI); + }); + assert(FirstDef != MRI.def_end() && + "Must have one definition that does not redefine vreg"); + MachineInstr &DefMI = *FirstDef->getParent(); + + // The register scavenger will report a free register inserting an emergency + // spill/reload if necessary. + int SPAdj = 0; + const TargetRegisterClass &RC = *MRI.getRegClass(VReg); + unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), + ReserveAfter, SPAdj); + MRI.replaceRegWith(VReg, SReg); + ++NumScavengedRegs; + return SReg; +} + +/// Allocate (scavenge) vregs inside a single basic block. +/// Returns true if the target spill callback created new vregs and a 2nd pass +/// is necessary. +static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, + RegScavenger &RS, + MachineBasicBlock &MBB) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + RS.enterBasicBlockEnd(MBB); + + unsigned InitialNumVirtRegs = MRI.getNumVirtRegs(); + bool NextInstructionReadsVReg = false; + for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) { + --I; + // Move RegScavenger to the position between *I and *std::next(I). + RS.backward(I); + + // Look for unassigned vregs in the uses of *std::next(I). + if (NextInstructionReadsVReg) { + MachineBasicBlock::iterator N = std::next(I); + const MachineInstr &NMI = *N; + for (const MachineOperand &MO : NMI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + // We only care about virtual registers and ignore virtual registers + // created by the target callbacks in the process (those will be handled + // in a scavenging round). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + if (!MO.readsReg()) continue; - // When we first encounter a new virtual register, it - // must be a definition. - assert(MO.isDef() && "frame index virtual missing def!"); - // Scavenge a new scratch register - const TargetRegisterClass *RC = MRI.getRegClass(Reg); - unsigned ScratchReg = RS.scavengeRegister(RC, J, SPAdj); + unsigned SReg = scavengeVReg(MRI, RS, Reg, true); + N->addRegisterKilled(SReg, &TRI, false); + RS.setRegUsed(SReg); + } + } - ++NumScavengedRegs; + // Look for unassigned vregs in the defs of *I. + NextInstructionReadsVReg = false; + const MachineInstr &MI = *I; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + // Only vregs, no newly created vregs (see above). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + // We have to look at all operands anyway so we can precalculate here + // whether there is a reading operand. This allows use to skip the use + // step in the next iteration if there was none. + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + if (MO.readsReg()) { + NextInstructionReadsVReg = true; + } + if (MO.isDef()) { + unsigned SReg = scavengeVReg(MRI, RS, Reg, false); + I->addRegisterDead(SReg, &TRI, false); + } + } + } +#ifndef NDEBUG + for (const MachineOperand &MO : MBB.front().operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + assert(!MO.readsReg() && "Vreg use in first instruction not allowed"); + } +#endif - // Replace this reference to the virtual register with the - // scratch register. - assert(ScratchReg && "Missing scratch register!"); - MRI.replaceRegWith(Reg, ScratchReg); + return MRI.getNumVirtRegs() != InitialNumVirtRegs; +} - // Because this instruction was processed by the RS before this - // register was allocated, make sure that the RS now records the - // register as being used. - RS.setRegUsed(ScratchReg); - } +void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { + // FIXME: Iterating over the instruction stream is unnecessary. We can simply + // iterate over the vreg use list, which at this point only contains machine + // operands for which eliminateFrameIndex need a new scratch reg. + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Shortcut. + if (MRI.getNumVirtRegs() == 0) { + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + return; + } + + // Run through the instructions and find any virtual registers. + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; - // If the scavenger needed to use one of its spill slots, the - // spill code will have been inserted in between I and J. This is a - // problem because we need the spill code before I: Move I to just - // prior to J. - if (I != std::prev(J)) { - MBB.splice(J, &MBB, I); - - // Before we move I, we need to prepare the RS to visit I again. - // Specifically, RS will assert if it sees uses of registers that - // it believes are undefined. Because we have already processed - // register kills in I, when it visits I again, it will believe that - // those registers are undefined. To avoid this situation, unprocess - // the instruction I. - assert(RS.getCurrentPosition() == I && - "The register scavenger has an unexpected position"); - I = P; - RS.unprocess(P); - } else - ++I; + bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + if (Again) { + DEBUG(dbgs() << "Warning: Required two scavenging passes for block " + << MBB.getName() << '\n'); + Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + // The target required a 2nd run (because it created new vregs while + // spilling). Refuse to do another pass to keep compiletime in check. + if (Again) + report_fatal_error("Incomplete scavenging after 2nd pass"); } } diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index d7a3ac0808230..30757f070cadb 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -1,4 +1,4 @@ -//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===// +//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===// // // The LLVM Compiler Infrastructure // @@ -38,7 +38,7 @@ static cl::opt<bool> DumpRegUsage( cl::desc("print register usage details collected for analysis.")); INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info", - "Register Usage Informartion Stroage", false, true) + "Register Usage Information Storage", false, true) char PhysicalRegisterUsageInfo::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index a668ddb7389f1..ae9c5adb03979 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_library(LLVMSelectionDAG ScheduleDAGVLIW.cpp SelectionDAGBuilder.cpp SelectionDAG.cpp + SelectionDAGAddressAnalysis.cpp SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2d4422d94a172..d02dcb6f4439b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -469,7 +470,8 @@ namespace { /// \return True if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, - bool IsConstantSrc, bool UseVector); + bool IsConstantSrc, bool UseVector, + bool UseTrunc); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. @@ -2549,14 +2551,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1IsConst && ConstValue1 == 0) + if (N1IsConst && ConstValue1.isNullValue()) return N1; // We require a splat of the entire scalar bit width for non-contiguous // bit patterns. bool IsFullSplat = ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -3685,7 +3687,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) - if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue())) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -4694,110 +4696,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { } namespace { -/// Helper struct to parse and store a memory address as base + index + offset. -/// We ignore sign extensions when it is safe to do so. -/// The following two expressions are not equivalent. To differentiate we need -/// to store whether there was a sign extension involved in the index -/// computation. -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (add (i8 load %index) -/// (i8 1)))) -/// vs -/// -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) -/// (i32 1))))) -struct BaseIndexOffset { - SDValue Base; - SDValue Index; - int64_t Offset; - bool IsIndexSignExt; - - BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} - - BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, - bool IsIndexSignExt) : - Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} - - bool equalBaseIndex(const BaseIndexOffset &Other) { - return Other.Base == Base && Other.Index == Index && - Other.IsIndexSignExt == IsIndexSignExt; - } - - /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, - int64_t PartialOffset = 0) { - bool IsIndexSignExt = false; - - // Split up a folded GlobalAddress+Offset into its component parts. - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) - if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { - return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), - SDLoc(GA), - GA->getValueType(0), - /*Offset=*/PartialOffset, - /*isTargetGA=*/false, - GA->getTargetFlags()), - SDValue(), - GA->getOffset(), - IsIndexSignExt); - } - - // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD - // instruction, then it could be just the BASE or everything else we don't - // know how to handle. Just use Ptr as BASE and give up. - if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // We know that we have at least an ADD instruction. Try to pattern match - // the simple case of BASE + OFFSET. - if (isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); - } - - // Inside a loop the current BASE pointer is calculated using an ADD and a - // MUL instruction. In this case Ptr is the actual BASE pointer. - // (i64 add (i64 %array_ptr) - // (i64 mul (i64 %induction_var) - // (i64 %element_size))) - if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // Look at Base + Index + Offset cases. - SDValue Base = Ptr->getOperand(0); - SDValue IndexOffset = Ptr->getOperand(1); - - // Skip signextends. - if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { - IndexOffset = IndexOffset->getOperand(0); - IsIndexSignExt = true; - } - - // Either the case of Base + Index (no offset) or something else. - if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); - - // Now we have the case of Base + Index + offset. - SDValue Index = IndexOffset->getOperand(0); - SDValue Offset = IndexOffset->getOperand(1); - - if (!isa<ConstantSDNode>(Offset)) - return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); - - // Ignore signextends. - if (Index->getOpcode() == ISD::SIGN_EXTEND) { - Index = Index->getOperand(0); - IsIndexSignExt = true; - } else IsIndexSignExt = false; - - int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); - } -}; -} // namespace - -namespace { /// Represents known origin of an individual byte in load combine pattern. The /// value of the byte is either constant zero or comes from memory. struct ByteProvider { @@ -5017,14 +4915,15 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Loads must share the same base address - BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr()); + int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; - else if (!Base->equalBaseIndex(Ptr)) + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); // Calculate the offset of the current byte from the base address - int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P); + ByteOffsetFromBase += MemoryByteOffset(*P); ByteOffsets[i] = ByteOffsetFromBase; // Remember the first byte load @@ -12378,8 +12277,8 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( - SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, - unsigned NumStores, bool IsConstantSrc, bool UseVector) { + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector, bool UseTrunc) { // Make sure we have something to merge. if (NumStores < 2) return false; @@ -12464,7 +12363,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; - if (TLI.isTypeLegal(StoredVal.getValueType())) { + if (UseVector || !UseTrunc) { NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); @@ -12495,15 +12394,15 @@ void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); EVT MemVT = St->getMemoryVT(); // We must have a base and an offset. - if (!BasePtr.Base.getNode()) + if (!BasePtr.getBase().getNode()) return; // Do not handle stores to undef base pointers. - if (BasePtr.Base.isUndef()) + if (BasePtr.getBase().isUndef()) return; bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) || @@ -12515,10 +12414,11 @@ void DAGCombiner::getStoreMergeCandidates( BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. if (IsLoadSrc) - LBasePtr = BaseIndexOffset::match( - cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG); + LBasePtr = + BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr()); - auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool { + auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, + int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; // We can merge constant floats to equivalent integers @@ -12529,8 +12429,8 @@ void DAGCombiner::getStoreMergeCandidates( if (IsLoadSrc) { // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) { - auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); - if (!(LBasePtr.equalBaseIndex(LPtr))) + auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr()); + if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else return false; @@ -12543,8 +12443,8 @@ void DAGCombiner::getStoreMergeCandidates( if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) return false; - Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); - return (Ptr.equalBaseIndex(BasePtr)); + Ptr = BaseIndexOffset::match(Other->getBasePtr()); + return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down @@ -12572,16 +12472,18 @@ void DAGCombiner::getStoreMergeCandidates( if (I2.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { BaseIndexOffset Ptr; - if (CandidateMatch(OtherST, Ptr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) if (I.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { BaseIndexOffset Ptr; - if (CandidateMatch(OtherST, Ptr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); + int64_t PtrDiff; + if (CandidateMatch(OtherST, Ptr, PtrDiff)) + StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } @@ -12721,8 +12623,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 0; - unsigned LastLegalVectorType = 0; + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; bool NonZero = false; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); @@ -12747,6 +12650,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { + LastIntegerTrunc = false; LastLegalType = i + 1; // Or check whether a truncstore is legal. } else if (TLI.getTypeAction(Context, StoreTy) == @@ -12758,6 +12662,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { + LastIntegerTrunc = true; LastLegalType = i + 1; } } @@ -12787,8 +12692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, - true, UseVector); + bool Merged = MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); if (!Merged) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); continue; @@ -12836,7 +12741,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } bool Merged = MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true); + StoreNodes, MemVT, NumStoresToMerge, false, true, false); if (!Merged) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); @@ -12881,11 +12786,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); // If this is not the first ptr that we check. - if (LdBasePtr.Base.getNode()) { + int64_t LdOffset = 0; + if (LdBasePtr.getBase().getNode()) { // The base ptr must be the same. - if (!LdPtr.equalBaseIndex(LdBasePtr)) + if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) break; } else { // Check that all other base pointers are the same as this one. @@ -12893,7 +12799,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset)); + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); } if (LoadNodes.size() < 2) { @@ -12919,10 +12825,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Scan the memory operations on the chain and find the first // non-consecutive load memory address. These variables hold the index in // the store node array. - unsigned LastConsecutiveLoad = 0; + unsigned LastConsecutiveLoad = 1; // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 0; - unsigned LastLegalIntegerType = 0; + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool DoIntegerTruncate = false; StartAddress = LoadNodes[0].OffsetFromBase; SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { @@ -12958,11 +12865,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && - IsFastLd) + IsFastLd) { LastLegalIntegerType = i + 1; - // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && @@ -12976,8 +12884,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && - IsFastLd) + IsFastLd) { LastLegalIntegerType = i + 1; + DoIntegerTruncate = true; + } } } @@ -13012,17 +12922,31 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // The merged loads are required to have the same incoming chain, so // using the first's chain is acceptable. - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoadAlign); SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); - SDValue NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign); + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign); + NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = + DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), + JointMemOpVT, FirstLoadAlign); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { @@ -13285,7 +13209,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Only perform this optimization before the types are legal, because we // don't want to perform this optimization on every DAGCombine invocation. - if (!LegalTypes) { + if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG + : !LegalTypes) { for (;;) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so @@ -14035,6 +13960,11 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // when we start sorting the vectors by type. return SDValue(); } + } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && + InVT1.getSizeInBits() == VT.getSizeInBits()) { + SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); + ConcatOps[0] = VecIn2; + VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. @@ -16610,11 +16540,11 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; // Check for BaseIndexOffset matching. - BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); - BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); - if (BasePtr0.equalBaseIndex(BasePtr1)) - return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) || - (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset)); + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr()); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr()); + int64_t PtrDiff; + if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) + return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis // modified to use BaseIndexOffset. @@ -16821,14 +16751,14 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. - if (!BasePtr.Base.getNode()) + if (!BasePtr.getBase().getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.Base.isUndef()) + if (BasePtr.getBase().isUndef()) return false; SmallVector<StoreSDNode *, 8> ChainedStores; @@ -16847,10 +16777,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); // Check that the base pointer is the same as the original one. - if (!Ptr.equalBaseIndex(BasePtr)) + if (!BasePtr.equalBaseIndex(Ptr, DAG)) break; // Walk up the chain to find the next store node, ignoring any diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 15e87b7af18dc..873b2bd48f1e0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3530,17 +3530,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - // The high part is obtained by SRA'ing all but one of the bits of low - // part. - unsigned LoSize = VT.getSizeInBits(); - SDValue HiLHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); - SDValue HiRHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy(DAG.getDataLayout()))); + SDValue HiLHS; + SDValue HiRHS; + if (isSigned) { + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + HiLHS = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + HiRHS = + DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } else { + HiLHS = DAG.getConstant(0, dl, VT); + HiRHS = DAG.getConstant(0, dl, VT); + } // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a3ba52a148ee4..75fec7bd1d485 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -615,9 +615,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, N->getOperand(2)); - assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); // Convert to the expected type. - return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); + return DAG.getSExtOrTrunc(SetCC, dl, NVT); } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 593efc5121f90..70b1fa77a0991 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1861,28 +1861,68 @@ static int checkSpecialNodes(const SUnit *left, const SUnit *right) { /// Smaller number is the higher priority. static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { - unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; - if (SethiUllmanNumber != 0) - return SethiUllmanNumber; - - unsigned Extra = 0; - for (const SDep &Pred : SU->Preds) { - if (Pred.isCtrl()) continue; // ignore chain preds - SUnit *PredSU = Pred.getSUnit(); - unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); - if (PredSethiUllman > SethiUllmanNumber) { - SethiUllmanNumber = PredSethiUllman; - Extra = 0; - } else if (PredSethiUllman == SethiUllmanNumber) - ++Extra; - } + if (SUNumbers[SU->NodeNum] != 0) + return SUNumbers[SU->NodeNum]; + + // Use WorkList to avoid stack overflow on excessively large IRs. + struct WorkState { + WorkState(const SUnit *SU) : SU(SU) {} + const SUnit *SU; + unsigned PredsProcessed = 0; + }; - SethiUllmanNumber += Extra; + SmallVector<WorkState, 16> WorkList; + WorkList.push_back(SU); + while (!WorkList.empty()) { + auto &Temp = WorkList.back(); + auto *TempSU = Temp.SU; + bool AllPredsKnown = true; + // Try to find a non-evaluated pred and push it into the processing stack. + for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { + auto &Pred = TempSU->Preds[P]; + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + if (SUNumbers[PredSU->NodeNum] == 0) { +#ifndef NDEBUG + // In debug mode, check that we don't have such element in the stack. + for (auto It : WorkList) + assert(It.SU != PredSU && "Trying to push an element twice?"); +#endif + // Next time start processing this one starting from the next pred. + Temp.PredsProcessed = P + 1; + WorkList.push_back(PredSU); + AllPredsKnown = false; + break; + } + } - if (SethiUllmanNumber == 0) - SethiUllmanNumber = 1; + if (!AllPredsKnown) + continue; - return SethiUllmanNumber; + // Once all preds are known, we can calculate the answer for this one. + unsigned SethiUllmanNumber = 0; + unsigned Extra = 0; + for (const SDep &Pred : TempSU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); + unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; + assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; + WorkList.pop_back(); + } + + assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); + return SUNumbers[SU->NodeNum]; } /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7abdc76cb004f..98553152117d1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // TODO: In the AlwaysInline case, if the size is big then generate a loop // rather than maybe a humongous number of loads and stores. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && - DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign)) + DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign /= 2; if (NewAlign > Align) { @@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // thing to do is generate a LoadExt/StoreTrunc pair. These simplify // to Load/Store if NVT==VT. // FIXME does the case above also need this? - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT NVT = TLI.getTypeToTransformTo(C, VT); assert(NVT.bitsGE(VT)); + + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), MMOFlags); + MinAlign(SrcAlign, SrcOff), SrcMMOFlags); OutChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), @@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) @@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value; + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags); + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp new file mode 100644 index 0000000000000..d2e0dbbf88ecd --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -0,0 +1,95 @@ +//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address +//Analysis ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, + const SelectionDAG &DAG, int64_t &Off) { + // Obvious equivalent + Off = Other.Offset - Offset; + if (Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt) + return true; + + // Match GlobalAddresses + if (Index == Other.Index) + if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base)) + if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base)) + if (A->getGlobal() == B->getGlobal()) { + Off += B->getOffset() - A->getOffset(); + return true; + } + + // TODO: we should be able to add FrameIndex analysis improvements here. + + return false; +} + +/// Parses tree in Ptr for base, index, offset addresses. +BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) { + // (((B + I*M) + c)) + c ... + SDValue Base = Ptr; + SDValue Index = SDValue(); + int64_t Offset = 0; + bool IsIndexSignExt = false; + + // Consume constant adds + while (Base->getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Base->getOperand(1))) { + int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue(); + Offset += POffset; + Base = Base->getOperand(0); + } + + if (Base->getOpcode() == ISD::ADD) { + // TODO: The following code appears to be needless as it just + // bails on some Ptrs early, reducing the cases where we + // find equivalence. We should be able to remove this. + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Base is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Base->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); + + // Look at Base + Index + Offset cases. + Index = Base->getOperand(1); + SDValue PotentialBase = Base->getOperand(0); + + // Skip signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } + + // Check if Index Offset pattern + if (Index->getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Index->getOperand(1))) + return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt); + + Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue(); + Index = Index->getOperand(0); + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else + IsIndexSignExt = false; + Base = PotentialBase; + } + return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); +} +} // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index dcccd17bb98ea..f711ca71f79fe 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -337,12 +337,13 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. If available, we pass a -/// dominator tree to be updated when we split critical edges. This is because -/// SelectionDAGISel preserves the DominatorTree. +/// execute the possibly trapping instruction. If available, we pass domtree +/// and loop info to be updated when we split critical edges. This is because +/// SelectionDAGISel preserves these analyses. /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) { +static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, + LoopInfo *LI) { // Loop for blocks with phi nodes. for (BasicBlock &BB : Fn) { PHINode *PN = dyn_cast<PHINode>(BB.begin()); @@ -368,7 +369,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) { // Okay, we have to split this edge. SplitCriticalEdge( Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions(DT).setMergeIdenticalEdges()); + CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -406,10 +407,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ORE = make_unique<OptimizationRemarkEmitter>(&Fn); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT); + SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); CurDAG->init(*MF, *ORE); FuncInfo->set(Fn, *MF, CurDAG); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cfda0fffd031a..8652df7bbd706 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -365,10 +365,10 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, // If this is a 'not' op, don't touch it because that's a canonical form. const APInt &C = Op1C->getAPIntValue(); - if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue()) + if (Opcode == ISD::XOR && Demanded.isSubsetOf(C)) return false; - if (C.intersects(~Demanded)) { + if (!C.isSubsetOf(Demanded)) { EVT VT = Op.getValueType(); SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); @@ -919,7 +919,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (NewMask == 1) + if (NewMask.isOneValue()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -1349,7 +1349,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { case UndefinedBooleanContent: return CVal[0]; case ZeroOrOneBooleanContent: - return CVal == 1; + return CVal.isOneValue(); case ZeroOrNegativeOneBooleanContent: return CVal.isAllOnesValue(); } @@ -1506,7 +1506,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. - if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && N0.getOperand(1).getOpcode() == ISD::Constant) { const APInt &ShAmt @@ -1666,7 +1666,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, for (unsigned width = origWidth / 2; width>=8; width /= 2) { APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { - if ((newMask & Mask) == Mask) { + if (Mask.isSubsetOf(newMask)) { if (DAG.getDataLayout().isLittleEndian()) bestOffset = (uint64_t)offset * (width/8); else @@ -1785,12 +1785,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ExtSrcTyBits), dl, ExtDstTy), Cond); - } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + } else if ((N1C->isNullValue() || N1C->isOne()) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. @@ -1807,7 +1807,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa<ConstantSDNode>(N0.getOperand(1)) && - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + cast<ConstantSDNode>(N0.getOperand(1))->isOne()) { // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We // can only do this if the top bits are known zero. unsigned BitWidth = N0.getValueSizeInBits(); @@ -1830,7 +1830,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } - } else if (N1C->getAPIntValue() == 1 && + } else if (N1C->isOne() && (VT == MVT::i1 || getBooleanContents(N0->getValueType(0)) == ZeroOrOneBooleanContent)) { @@ -1848,7 +1848,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (Op0.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op0.getOperand(1)) && - cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { + cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -2482,7 +2482,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // gcc prints these as sign extended. Sign extend value to 64 bits // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. - Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(C), MVT::i64)); } return; diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index c8537ad2f3130..eeb00a784b0d9 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===// +//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==// // // The LLVM Compiler Infrastructure // @@ -11,17 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <utility> #define DEBUG_TYPE "target-reg-info" @@ -38,7 +48,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, CoveringLanes(SRICoveringLanes) { } -TargetRegisterInfo::~TargetRegisterInfo() {} +TargetRegisterInfo::~TargetRegisterInfo() = default; void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg) const { @@ -126,7 +136,7 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -} // End of llvm namespace +} // end namespace llvm /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index 82e85bab14747..f6d5bc80ddffb 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==// +//===- TargetSubtargetInfo.cpp - General Target Information ----------------==// // // The LLVM Compiler Infrastructure // @@ -11,15 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include <string> + using namespace llvm; -//--------------------------------------------------------------------------- -// TargetSubtargetInfo Class -// TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD, @@ -29,7 +31,7 @@ TargetSubtargetInfo::TargetSubtargetInfo( : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { } -TargetSubtargetInfo::~TargetSubtargetInfo() {} +TargetSubtargetInfo::~TargetSubtargetInfo() = default; bool TargetSubtargetInfo::enableAtomicExpand() const { return true; diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index f916695a84392..b94bb0c80c793 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -24,11 +24,10 @@ add_llvm_library(LLVMDebugInfoCodeView SymbolRecordMapping.cpp SymbolDumper.cpp SymbolSerializer.cpp - TypeDatabase.cpp - TypeDatabaseVisitor.cpp TypeDumpVisitor.cpp TypeIndex.cpp TypeIndexDiscovery.cpp + TypeName.cpp TypeRecordMapping.cpp TypeSerializer.cpp TypeStreamMerger.cpp diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index f0debd9e97023..22f166a2335d6 100644 --- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -12,8 +12,6 @@ #include "llvm/ADT/TinyPtrVector.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" #include "llvm/DebugInfo/CodeView/TypeServerHandler.h" diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp index 334c5e002bbca..d69eca018e0c1 100644 --- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp +++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -53,12 +53,16 @@ DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( std::shared_ptr<DebugSubsection> Subsection, CodeViewContainer Container) : Subsection(std::move(Subsection)), Container(Container) {} +DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( + const DebugSubsectionRecord &Contents, CodeViewContainer Container) + : Contents(Contents), Container(Container) {} + uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { - // The length of the entire subsection is always padded to 4 bytes, regardless - // of the container kind. - uint32_t Size = sizeof(DebugSubsectionHeader) + - alignTo(Subsection->calculateSerializedSize(), 4); - return Size; + uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize() + : Contents.getRecordData().getLength(); + // The length of the entire subsection is always padded to 4 bytes, + // regardless of the container kind. + return sizeof(DebugSubsectionHeader) + alignTo(DataSize, 4); } Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const { @@ -66,16 +70,22 @@ Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const { "Debug Subsection not properly aligned"); DebugSubsectionHeader Header; - Header.Kind = uint32_t(Subsection->kind()); + Header.Kind = uint32_t(Subsection ? Subsection->kind() : Contents.kind()); // The value written into the Header's Length field is only padded to the // container's alignment - Header.Length = - alignTo(Subsection->calculateSerializedSize(), alignOf(Container)); + uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize() + : Contents.getRecordData().getLength(); + Header.Length = alignTo(DataSize, alignOf(Container)); if (auto EC = Writer.writeObject(Header)) return EC; - if (auto EC = Subsection->commit(Writer)) - return EC; + if (Subsection) { + if (auto EC = Subsection->commit(Writer)) + return EC; + } else { + if (auto EC = Writer.writeStreamRef(Contents.getRecordData())) + return EC; + } if (auto EC = Writer.padToAlignment(4)) return EC; diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp index 01d8ccf2d31e8..ec00af28395e5 100644 --- a/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/lib/DebugInfo/CodeView/EnumTables.cpp @@ -82,6 +82,13 @@ static const EnumEntry<uint16_t> RegisterNames[] = { CV_ENUM_CLASS_ENT(RegisterId, R15), }; +static const EnumEntry<uint32_t> PublicSymFlagNames[] = { + CV_ENUM_CLASS_ENT(PublicSymFlags, Code), + CV_ENUM_CLASS_ENT(PublicSymFlags, Function), + CV_ENUM_CLASS_ENT(PublicSymFlags, Managed), + CV_ENUM_CLASS_ENT(PublicSymFlags, MSIL), +}; + static const EnumEntry<uint8_t> ProcSymFlagNames[] = { CV_ENUM_CLASS_ENT(ProcSymFlags, HasFP), CV_ENUM_CLASS_ENT(ProcSymFlags, HasIRET), @@ -338,6 +345,9 @@ ArrayRef<EnumEntry<uint16_t>> getRegisterNames() { return makeArrayRef(RegisterNames); } +ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames() { + return makeArrayRef(PublicSymFlagNames); +} ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames() { return makeArrayRef(ProcSymFlagNames); } diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp index 39eb4099ce9e6..20f7e72c3af39 100644 --- a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -11,7 +11,7 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" +#include "llvm/DebugInfo/CodeView/TypeName.h" #include "llvm/DebugInfo/CodeView/TypeServerHandler.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" @@ -31,15 +31,13 @@ LazyRandomTypeCollection::LazyRandomTypeCollection(uint32_t RecordCountHint) LazyRandomTypeCollection::LazyRandomTypeCollection( const CVTypeArray &Types, uint32_t RecordCountHint, PartialOffsetArray PartialOffsets) - : Database(RecordCountHint), Types(Types), DatabaseVisitor(Database), - PartialOffsets(PartialOffsets) { - KnownOffsets.resize(Database.capacity()); + : NameStorage(Allocator), Types(Types), PartialOffsets(PartialOffsets) { + Records.resize(RecordCountHint); } LazyRandomTypeCollection::LazyRandomTypeCollection(ArrayRef<uint8_t> Data, uint32_t RecordCountHint) : LazyRandomTypeCollection(RecordCountHint) { - reset(Data); } LazyRandomTypeCollection::LazyRandomTypeCollection(StringRef Data, @@ -52,50 +50,88 @@ LazyRandomTypeCollection::LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t NumRecords) : LazyRandomTypeCollection(Types, NumRecords, PartialOffsetArray()) {} -void LazyRandomTypeCollection::reset(StringRef Data) { - reset(makeArrayRef(Data.bytes_begin(), Data.bytes_end())); -} - -void LazyRandomTypeCollection::reset(ArrayRef<uint8_t> Data) { +void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) { + Count = 0; PartialOffsets = PartialOffsetArray(); BinaryStreamReader Reader(Data, support::little); error(Reader.readArray(Types, Reader.getLength())); - KnownOffsets.resize(Database.capacity()); + // Clear and then resize, to make sure existing data gets destroyed. + Records.clear(); + Records.resize(RecordCountHint); +} + +void LazyRandomTypeCollection::reset(ArrayRef<uint8_t> Data, + uint32_t RecordCountHint) { + reset(toStringRef(Data), RecordCountHint); +} + +uint32_t LazyRandomTypeCollection::getOffsetOfType(TypeIndex Index) { + error(ensureTypeExists(Index)); + assert(contains(Index)); + + return Records[Index.toArrayIndex()].Offset; } CVType LazyRandomTypeCollection::getType(TypeIndex Index) { error(ensureTypeExists(Index)); - return Database.getTypeRecord(Index); + assert(contains(Index)); + + return Records[Index.toArrayIndex()].Type; } StringRef LazyRandomTypeCollection::getTypeName(TypeIndex Index) { - if (!Index.isSimple()) { - // Try to make sure the type exists. Even if it doesn't though, it may be - // because we're dumping a symbol stream with no corresponding type stream - // present, in which case we still want to be able to print <unknown UDT> - // for the type names. - consumeError(ensureTypeExists(Index)); + if (Index.isNoneType() || Index.isSimple()) + return TypeIndex::simpleTypeName(Index); + + // Try to make sure the type exists. Even if it doesn't though, it may be + // because we're dumping a symbol stream with no corresponding type stream + // present, in which case we still want to be able to print <unknown UDT> + // for the type names. + if (auto EC = ensureTypeExists(Index)) { + consumeError(std::move(EC)); + return "<unknown UDT>"; } - return Database.getTypeName(Index); + uint32_t I = Index.toArrayIndex(); + ensureCapacityFor(Index); + if (Records[I].Name.data() == nullptr) { + StringRef Result = NameStorage.save(computeTypeName(*this, Index)); + Records[I].Name = Result; + } + return Records[I].Name; } bool LazyRandomTypeCollection::contains(TypeIndex Index) { - return Database.contains(Index); + if (Records.size() <= Index.toArrayIndex()) + return false; + if (!Records[Index.toArrayIndex()].Type.valid()) + return false; + return true; } -uint32_t LazyRandomTypeCollection::size() { return Database.size(); } +uint32_t LazyRandomTypeCollection::size() { return Count; } -uint32_t LazyRandomTypeCollection::capacity() { return Database.capacity(); } +uint32_t LazyRandomTypeCollection::capacity() { return Records.size(); } Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) { - if (!Database.contains(TI)) { - if (auto EC = visitRangeForType(TI)) - return EC; - } - return Error::success(); + if (contains(TI)) + return Error::success(); + + return visitRangeForType(TI); +} + +void LazyRandomTypeCollection::ensureCapacityFor(TypeIndex Index) { + uint32_t MinSize = Index.toArrayIndex() + 1; + + if (MinSize <= capacity()) + return; + + uint32_t NewCapacity = MinSize * 3 / 2; + + assert(NewCapacity > capacity()); + Records.resize(NewCapacity); } Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) { @@ -111,7 +147,7 @@ Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) { auto Prev = std::prev(Next); TypeIndex TIB = Prev->Type; - if (Database.contains(TIB)) { + if (contains(TIB)) { // They've asked us to fetch a type index, but the entry we found in the // partial offsets array has already been visited. Since we visit an entire // block every time, that means this record should have been previously @@ -122,13 +158,12 @@ Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) { TypeIndex TIE; if (Next == PartialOffsets.end()) { - TIE = TypeIndex::fromArrayIndex(Database.capacity()); + TIE = TypeIndex::fromArrayIndex(capacity()); } else { TIE = Next->Type; } - if (auto EC = visitRange(TIB, Prev->Offset, TIE)) - return EC; + visitRange(TIB, Prev->Offset, TIE); return Error::success(); } @@ -157,34 +192,31 @@ Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) { assert(PartialOffsets.empty()); TypeIndex CurrentTI = TypeIndex::fromArrayIndex(0); - uint32_t Offset = 0; auto Begin = Types.begin(); - if (!Database.empty()) { + if (Count > 0) { // In the case of type streams which we don't know the number of records of, // it's possible to search for a type index triggering a full scan, but then // later additional records are added since we didn't know how many there // would be until we did a full visitation, then you try to access the new // type triggering another full scan. To avoid this, we assume that if the - // database has some records, this must be what's going on. So we ask the - // database for the largest type index less than the one we're searching for - // and only do the forward scan from there. - auto Prev = Database.largestTypeIndexLessThan(TI); - assert(Prev.hasValue() && "Empty database with valid types?"); - Offset = KnownOffsets[Prev->toArrayIndex()]; - CurrentTI = *Prev; - ++CurrentTI; + // database has some records, this must be what's going on. We can also + // assume that this index must be larger than the largest type index we've + // visited, so we start from there and scan forward. + uint32_t Offset = Records[LargestTypeIndex.toArrayIndex()].Offset; + CurrentTI = LargestTypeIndex + 1; Begin = Types.at(Offset); ++Begin; - Offset = Begin.offset(); } auto End = Types.end(); while (Begin != End) { - if (auto EC = visitOneRecord(CurrentTI, Offset, *Begin)) - return EC; - - Offset += Begin.getRecordLength(); + ensureCapacityFor(CurrentTI); + LargestTypeIndex = std::max(LargestTypeIndex, CurrentTI); + auto Idx = CurrentTI.toArrayIndex(); + Records[Idx].Type = *Begin; + Records[Idx].Offset = Begin.offset(); + ++Count; ++Begin; ++CurrentTI; } @@ -194,36 +226,19 @@ Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) { return Error::success(); } -Error LazyRandomTypeCollection::visitRange(TypeIndex Begin, - uint32_t BeginOffset, - TypeIndex End) { - +void LazyRandomTypeCollection::visitRange(TypeIndex Begin, uint32_t BeginOffset, + TypeIndex End) { auto RI = Types.at(BeginOffset); assert(RI != Types.end()); + ensureCapacityFor(End); while (Begin != End) { - if (auto EC = visitOneRecord(Begin, BeginOffset, *RI)) - return EC; - - BeginOffset += RI.getRecordLength(); + LargestTypeIndex = std::max(LargestTypeIndex, Begin); + auto Idx = Begin.toArrayIndex(); + Records[Idx].Type = *RI; + Records[Idx].Offset = RI.offset(); + ++Count; ++Begin; ++RI; } - - return Error::success(); -} - -Error LazyRandomTypeCollection::visitOneRecord(TypeIndex TI, uint32_t Offset, - CVType &Record) { - assert(!Database.contains(TI)); - if (auto EC = codeview::visitTypeRecord(Record, TI, DatabaseVisitor)) - return EC; - // Keep the KnownOffsets array the same size as the Database's capacity. Since - // we don't always know how many records are in the type stream, we need to be - // prepared for the database growing and receicing a type index that can't fit - // in our current buffer. - if (KnownOffsets.size() < Database.capacity()) - KnownOffsets.resize(Database.capacity()); - KnownOffsets[TI.toArrayIndex()] = Offset; - return Error::success(); } diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 36abafc079edb..b9fa9b6a6ad7e 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -524,7 +524,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { DictScope S(W, "PublicSym"); - printTypeIndex("Type", Public.Index); + W.printFlags("Flags", uint32_t(Public.Flags), getPublicSymFlagNames()); W.printNumber("Seg", Public.Segment); W.printNumber("Off", Public.Offset); W.printString("Name", Public.Name); diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index d731dc1b0a372..923837a45d9fc 100644 --- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -361,7 +361,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { - error(IO.mapInteger(Public.Index)); + error(IO.mapEnum(Public.Flags)); error(IO.mapInteger(Public.Offset)); error(IO.mapInteger(Public.Segment)); error(IO.mapStringZ(Public.Name)); diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp deleted file mode 100644 index 08f848b36a9d5..0000000000000 --- a/lib/DebugInfo/CodeView/TypeDatabase.cpp +++ /dev/null @@ -1,146 +0,0 @@ -//===- TypeDatabase.cpp --------------------------------------- *- C++ --*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" - -using namespace llvm; -using namespace llvm::codeview; - -TypeDatabase::TypeDatabase(uint32_t Capacity) : TypeNameStorage(Allocator) { - CVUDTNames.resize(Capacity); - TypeRecords.resize(Capacity); - ValidRecords.resize(Capacity); -} - -TypeIndex TypeDatabase::appendType(StringRef Name, const CVType &Data) { - LargestTypeIndex = getAppendIndex(); - if (LargestTypeIndex.toArrayIndex() >= capacity()) - grow(); - recordType(Name, LargestTypeIndex, Data); - return LargestTypeIndex; -} - -void TypeDatabase::recordType(StringRef Name, TypeIndex Index, - const CVType &Data) { - LargestTypeIndex = empty() ? Index : std::max(Index, LargestTypeIndex); - - if (LargestTypeIndex.toArrayIndex() >= capacity()) - grow(Index); - - uint32_t AI = Index.toArrayIndex(); - - assert(!contains(Index)); - assert(AI < capacity()); - - CVUDTNames[AI] = Name; - TypeRecords[AI] = Data; - ValidRecords.set(AI); - ++Count; -} - -/// Saves the name in a StringSet and creates a stable StringRef. -StringRef TypeDatabase::saveTypeName(StringRef TypeName) { - return TypeNameStorage.save(TypeName); -} - -StringRef TypeDatabase::getTypeName(TypeIndex Index) const { - if (Index.isNoneType() || Index.isSimple()) - return TypeIndex::simpleTypeName(Index); - - if (contains(Index)) - return CVUDTNames[Index.toArrayIndex()]; - - return "<unknown UDT>"; -} - -const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const { - assert(contains(Index)); - return TypeRecords[Index.toArrayIndex()]; -} - -CVType &TypeDatabase::getTypeRecord(TypeIndex Index) { - assert(contains(Index)); - return TypeRecords[Index.toArrayIndex()]; -} - -bool TypeDatabase::contains(TypeIndex Index) const { - uint32_t AI = Index.toArrayIndex(); - if (AI >= capacity()) - return false; - - return ValidRecords.test(AI); -} - -uint32_t TypeDatabase::size() const { return Count; } - -uint32_t TypeDatabase::capacity() const { return TypeRecords.size(); } - -CVType TypeDatabase::getType(TypeIndex Index) { return getTypeRecord(Index); } - -StringRef TypeDatabase::getTypeName(TypeIndex Index) { - return static_cast<const TypeDatabase *>(this)->getTypeName(Index); -} - -bool TypeDatabase::contains(TypeIndex Index) { - return static_cast<const TypeDatabase *>(this)->contains(Index); -} - -uint32_t TypeDatabase::size() { - return static_cast<const TypeDatabase *>(this)->size(); -} - -uint32_t TypeDatabase::capacity() { - return static_cast<const TypeDatabase *>(this)->capacity(); -} - -void TypeDatabase::grow() { grow(LargestTypeIndex + 1); } - -void TypeDatabase::grow(TypeIndex NewIndex) { - uint32_t NewSize = NewIndex.toArrayIndex() + 1; - - if (NewSize <= capacity()) - return; - - uint32_t NewCapacity = NewSize * 3 / 2; - - TypeRecords.resize(NewCapacity); - CVUDTNames.resize(NewCapacity); - ValidRecords.resize(NewCapacity); -} - -bool TypeDatabase::empty() const { return size() == 0; } - -Optional<TypeIndex> TypeDatabase::largestTypeIndexLessThan(TypeIndex TI) const { - uint32_t AI = TI.toArrayIndex(); - int N = ValidRecords.find_prev(AI); - if (N == -1) - return None; - return TypeIndex::fromArrayIndex(N); -} - -TypeIndex TypeDatabase::getAppendIndex() const { - if (empty()) - return TypeIndex::fromArrayIndex(0); - - return LargestTypeIndex + 1; -} - -Optional<TypeIndex> TypeDatabase::getFirst() { - int N = ValidRecords.find_first(); - if (N == -1) - return None; - return TypeIndex::fromArrayIndex(N); -} - -Optional<TypeIndex> TypeDatabase::getNext(TypeIndex Prev) { - int N = ValidRecords.find_next(Prev.toArrayIndex()); - if (N == -1) - return None; - return TypeIndex::fromArrayIndex(N); -} diff --git a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp b/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp deleted file mode 100644 index 8d97f8b1cb401..0000000000000 --- a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp +++ /dev/null @@ -1,330 +0,0 @@ -//===- TypeDatabaseVisitor.cpp -------------------------------- *- C++ --*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" - -#include "llvm/ADT/SmallString.h" - -using namespace llvm; - -using namespace llvm::codeview; - -Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record) { - assert(!IsInFieldList); - // Reset Name to the empty string. If the visitor sets it, we know it. - Name = ""; - - if (Record.Type == LF_FIELDLIST) { - // Record that we're in a field list so that members do not get assigned - // type indices. - IsInFieldList = true; - } - return Error::success(); -} - -Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { - if (auto EC = visitTypeBegin(Record)) - return EC; - - CurrentTypeIndex = Index; - return Error::success(); -} - -StringRef TypeDatabaseVisitor::getTypeName(TypeIndex Index) const { - return TypeDB->getTypeName(Index); -} - -StringRef TypeDatabaseVisitor::saveTypeName(StringRef Name) { - return TypeDB->saveTypeName(Name); -} - -Error TypeDatabaseVisitor::visitTypeEnd(CVType &CVR) { - if (CVR.Type == LF_FIELDLIST) { - assert(IsInFieldList); - IsInFieldList = false; - } - assert(!IsInFieldList); - - // Record every type that is not a field list member, even if Name is empty. - // CVUDTNames is indexed by type index, and must have one entry for every - // type. Field list members are not recorded, and are only referenced by - // their containing field list record. - if (CurrentTypeIndex) - TypeDB->recordType(Name, *CurrentTypeIndex, CVR); - else - TypeDB->appendType(Name, CVR); - - CurrentTypeIndex.reset(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitMemberBegin(CVMemberRecord &Record) { - assert(IsInFieldList); - // Reset Name to the empty string. If the visitor sets it, we know it. - Name = ""; - return Error::success(); -} - -Error TypeDatabaseVisitor::visitMemberEnd(CVMemberRecord &Record) { - assert(IsInFieldList); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - FieldListRecord &FieldList) { - Name = "<field list>"; - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVRecord<TypeLeafKind> &CVR, - StringIdRecord &String) { - // Put this in the database so it gets printed with LF_UDT_SRC_LINE. - Name = String.getString(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { - auto Indices = Args.getIndices(); - uint32_t Size = Indices.size(); - SmallString<256> TypeName("("); - for (uint32_t I = 0; I < Size; ++I) { - StringRef ArgTypeName = getTypeName(Indices[I]); - TypeName.append(ArgTypeName); - if (I + 1 != Size) - TypeName.append(", "); - } - TypeName.push_back(')'); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - StringListRecord &Strings) { - auto Indices = Strings.getIndices(); - uint32_t Size = Indices.size(); - SmallString<256> TypeName("\""); - for (uint32_t I = 0; I < Size; ++I) { - StringRef ArgTypeName = getTypeName(Indices[I]); - TypeName.append(ArgTypeName); - if (I + 1 != Size) - TypeName.append("\" \""); - } - TypeName.push_back('\"'); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ClassRecord &Class) { - Name = Class.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, UnionRecord &Union) { - Name = Union.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, EnumRecord &Enum) { - Name = Enum.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArrayRecord &AT) { - Name = AT.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) { - Name = VFT.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - MemberFuncIdRecord &Id) { - Name = Id.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - ProcedureRecord &Proc) { - StringRef ReturnTypeName = getTypeName(Proc.getReturnType()); - StringRef ArgListTypeName = getTypeName(Proc.getArgumentList()); - SmallString<256> TypeName(ReturnTypeName); - TypeName.push_back(' '); - TypeName.append(ArgListTypeName); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - MemberFunctionRecord &MF) { - StringRef ReturnTypeName = getTypeName(MF.getReturnType()); - StringRef ClassTypeName = getTypeName(MF.getClassType()); - StringRef ArgListTypeName = getTypeName(MF.getArgumentList()); - SmallString<256> TypeName(ReturnTypeName); - TypeName.push_back(' '); - TypeName.append(ClassTypeName); - TypeName.append("::"); - TypeName.append(ArgListTypeName); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { - Name = Func.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - TypeServer2Record &TS) { - Name = TS.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { - - if (Ptr.isPointerToMember()) { - const MemberPointerInfo &MI = Ptr.getMemberInfo(); - - StringRef PointeeName = getTypeName(Ptr.getReferentType()); - StringRef ClassName = getTypeName(MI.getContainingType()); - SmallString<256> TypeName(PointeeName); - TypeName.push_back(' '); - TypeName.append(ClassName); - TypeName.append("::*"); - Name = saveTypeName(TypeName); - } else { - SmallString<256> TypeName; - if (Ptr.isConst()) - TypeName.append("const "); - if (Ptr.isVolatile()) - TypeName.append("volatile "); - if (Ptr.isUnaligned()) - TypeName.append("__unaligned "); - - TypeName.append(getTypeName(Ptr.getReferentType())); - - if (Ptr.getMode() == PointerMode::LValueReference) - TypeName.append("&"); - else if (Ptr.getMode() == PointerMode::RValueReference) - TypeName.append("&&"); - else if (Ptr.getMode() == PointerMode::Pointer) - TypeName.append("*"); - - if (!TypeName.empty()) - Name = saveTypeName(TypeName); - } - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { - uint16_t Mods = static_cast<uint16_t>(Mod.getModifiers()); - - StringRef ModifiedName = getTypeName(Mod.getModifiedType()); - SmallString<256> TypeName; - if (Mods & uint16_t(ModifierOptions::Const)) - TypeName.append("const "); - if (Mods & uint16_t(ModifierOptions::Volatile)) - TypeName.append("volatile "); - if (Mods & uint16_t(ModifierOptions::Unaligned)) - TypeName.append("__unaligned "); - TypeName.append(ModifiedName); - Name = saveTypeName(TypeName); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - VFTableShapeRecord &Shape) { - Name = - saveTypeName("<vftable " + utostr(Shape.getEntryCount()) + " methods>"); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - NestedTypeRecord &Nested) { - Name = Nested.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - OneMethodRecord &Method) { - Name = Method.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - OverloadedMethodRecord &Method) { - Name = Method.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - DataMemberRecord &Field) { - Name = Field.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - StaticDataMemberRecord &Field) { - Name = Field.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - EnumeratorRecord &Enum) { - Name = Enum.getName(); - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - BaseClassRecord &Base) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - VirtualBaseClassRecord &VBase) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - ListContinuationRecord &Cont) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord( - CVType &CVR, UdtModSourceLineRecord &ModSourceLine) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, - UdtSourceLineRecord &SourceLine) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord( - CVType &CVR, MethodOverloadListRecord &Overloads) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, LabelRecord &R) { - return Error::success(); -} - -Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR, - VFPtrRecord &VFP) { - return Error::success(); -} diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp index 04b0384d81902..5899667050151 100644 --- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp +++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp @@ -13,8 +13,6 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/Formatters.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/BinaryByteStream.h" diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index 8704cea607867..1226d5be3f3c4 100644 --- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -357,6 +357,82 @@ static void discoverTypeIndices(ArrayRef<uint8_t> Content, TypeLeafKind Kind, } } +static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, + SmallVectorImpl<TiReference> &Refs) { + uint32_t Count; + // FIXME: In the future it would be nice if we could avoid hardcoding these + // values. One idea is to define some structures representing these types + // that would allow the use of offsetof(). + switch (Kind) { + case SymbolKind::S_GPROC32: + case SymbolKind::S_LPROC32: + case SymbolKind::S_GPROC32_ID: + case SymbolKind::S_LPROC32_ID: + case SymbolKind::S_LPROC32_DPC: + case SymbolKind::S_LPROC32_DPC_ID: + Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID + break; + case SymbolKind::S_UDT: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT + break; + case SymbolKind::S_GDATA32: + case SymbolKind::S_LDATA32: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_BUILDINFO: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags + break; + case SymbolKind::S_LOCAL: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_CONSTANT: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_REGREL32: + Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type + break; + case SymbolKind::S_CALLSITEINFO: + Refs.push_back({TiRefKind::TypeRef, 8, 1}); // Call signature + break; + case SymbolKind::S_CALLERS: + case SymbolKind::S_CALLEES: + // The record is a count followed by an array of type indices. + Count = *reinterpret_cast<const ulittle32_t *>(Content.data()); + Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees + break; + case SymbolKind::S_INLINESITE: + Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee + break; + + // Defranges don't have types, just registers and code offsets. + case SymbolKind::S_DEFRANGE_REGISTER: + case SymbolKind::S_DEFRANGE_REGISTER_REL: + case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL: + case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE: + case SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER: + case SymbolKind::S_DEFRANGE_SUBFIELD: + break; + + // No type refernces. + case SymbolKind::S_LABEL32: + case SymbolKind::S_OBJNAME: + case SymbolKind::S_COMPILE: + case SymbolKind::S_COMPILE2: + case SymbolKind::S_COMPILE3: + case SymbolKind::S_BLOCK32: + case SymbolKind::S_FRAMEPROC: + break; + // Scope ending symbols. + case SymbolKind::S_END: + case SymbolKind::S_INLINESITE_END: + case SymbolKind::S_PROC_ID_END: + break; + default: + return false; // Unknown symbol. + } + return true; +} + void llvm::codeview::discoverTypeIndices(const CVType &Type, SmallVectorImpl<TiReference> &Refs) { ::discoverTypeIndices(Type.content(), Type.kind(), Refs); @@ -369,3 +445,9 @@ void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData, TypeLeafKind K = static_cast<TypeLeafKind>(uint16_t(P->RecordKind)); ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs); } + +bool llvm::codeview::discoverTypeIndices(const CVSymbol &Sym, + SmallVectorImpl<TiReference> &Refs) { + SymbolKind K = Sym.kind(); + return ::discoverTypeIndices(Sym.content(), K, Refs); +} diff --git a/lib/DebugInfo/CodeView/TypeName.cpp b/lib/DebugInfo/CodeView/TypeName.cpp new file mode 100644 index 0000000000000..2eb8b81862f3c --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeName.cpp @@ -0,0 +1,243 @@ +//===- TypeName.cpp ------------------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeName.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +class TypeNameComputer : public TypeVisitorCallbacks { + /// The type collection. Used to calculate names of nested types. + TypeCollection &Types; + TypeIndex CurrentTypeIndex = TypeIndex::None(); + + /// Name of the current type. Only valid before visitTypeEnd. + SmallString<256> Name; + +public: + explicit TypeNameComputer(TypeCollection &Types) : Types(Types) {} + + StringRef name() const { return Name; } + + /// Paired begin/end actions for all types. Receives all record data, + /// including the fixed-length record prefix. + Error visitTypeBegin(CVType &Record) override; + Error visitTypeBegin(CVType &Record, TypeIndex Index) override; + Error visitTypeEnd(CVType &Record) override; + +#define TYPE_RECORD(EnumName, EnumVal, Name) \ + Error visitKnownRecord(CVType &CVR, Name##Record &Record) override; +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#define MEMBER_RECORD(EnumName, EnumVal, Name) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +}; +} // namespace + +Error TypeNameComputer::visitTypeBegin(CVType &Record) { + llvm_unreachable("Must call visitTypeBegin with a TypeIndex!"); + return Error::success(); +} + +Error TypeNameComputer::visitTypeBegin(CVType &Record, TypeIndex Index) { + // Reset Name to the empty string. If the visitor sets it, we know it. + Name = ""; + CurrentTypeIndex = Index; + return Error::success(); +} + +Error TypeNameComputer::visitTypeEnd(CVType &CVR) { return Error::success(); } + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + FieldListRecord &FieldList) { + Name = "<field list>"; + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVRecord<TypeLeafKind> &CVR, + StringIdRecord &String) { + Name = String.getString(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { + auto Indices = Args.getIndices(); + uint32_t Size = Indices.size(); + Name = "("; + for (uint32_t I = 0; I < Size; ++I) { + assert(Indices[I] < CurrentTypeIndex); + + Name.append(Types.getTypeName(Indices[I])); + if (I + 1 != Size) + Name.append(", "); + } + Name.push_back(')'); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + StringListRecord &Strings) { + auto Indices = Strings.getIndices(); + uint32_t Size = Indices.size(); + Name = "\""; + for (uint32_t I = 0; I < Size; ++I) { + Name.append(Types.getTypeName(Indices[I])); + if (I + 1 != Size) + Name.append("\" \""); + } + Name.push_back('\"'); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ClassRecord &Class) { + Name = Class.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, UnionRecord &Union) { + Name = Union.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, EnumRecord &Enum) { + Name = Enum.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArrayRecord &AT) { + Name = AT.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) { + Name = VFT.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, MemberFuncIdRecord &Id) { + Name = Id.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ProcedureRecord &Proc) { + StringRef Ret = Types.getTypeName(Proc.getReturnType()); + StringRef Params = Types.getTypeName(Proc.getArgumentList()); + Name = formatv("{0} {1}", Ret, Params).sstr<256>(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + MemberFunctionRecord &MF) { + StringRef Ret = Types.getTypeName(MF.getReturnType()); + StringRef Class = Types.getTypeName(MF.getClassType()); + StringRef Params = Types.getTypeName(MF.getArgumentList()); + Name = formatv("{0} {1}::{2}", Ret, Class, Params).sstr<256>(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { + Name = Func.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) { + Name = TS.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { + + if (Ptr.isPointerToMember()) { + const MemberPointerInfo &MI = Ptr.getMemberInfo(); + + StringRef Pointee = Types.getTypeName(Ptr.getReferentType()); + StringRef Class = Types.getTypeName(MI.getContainingType()); + Name = formatv("{0} {1}::*", Pointee, Class); + } else { + if (Ptr.isConst()) + Name.append("const "); + if (Ptr.isVolatile()) + Name.append("volatile "); + if (Ptr.isUnaligned()) + Name.append("__unaligned "); + + Name.append(Types.getTypeName(Ptr.getReferentType())); + + if (Ptr.getMode() == PointerMode::LValueReference) + Name.append("&"); + else if (Ptr.getMode() == PointerMode::RValueReference) + Name.append("&&"); + else if (Ptr.getMode() == PointerMode::Pointer) + Name.append("*"); + } + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { + uint16_t Mods = static_cast<uint16_t>(Mod.getModifiers()); + + SmallString<256> TypeName; + if (Mods & uint16_t(ModifierOptions::Const)) + Name.append("const "); + if (Mods & uint16_t(ModifierOptions::Volatile)) + Name.append("volatile "); + if (Mods & uint16_t(ModifierOptions::Unaligned)) + Name.append("__unaligned "); + Name.append(Types.getTypeName(Mod.getModifiedType())); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + VFTableShapeRecord &Shape) { + Name = formatv("<vftable {0} methods>", Shape.getEntryCount()); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord( + CVType &CVR, UdtModSourceLineRecord &ModSourceLine) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + UdtSourceLineRecord &SourceLine) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + MethodOverloadListRecord &Overloads) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, LabelRecord &R) { + return Error::success(); +} + +std::string llvm::codeview::computeTypeName(TypeCollection &Types, + TypeIndex Index) { + TypeNameComputer Computer(Types); + CVType Record = Types.getType(Index); + if (auto EC = visitTypeRecord(Record, Index, Computer)) { + consumeError(std::move(EC)); + return "<unknown UDT>"; + } + return Computer.name(); +} diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp index 8d974d522f283..4eca5aeaa0ae3 100644 --- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -10,7 +10,7 @@ #include "llvm/DebugInfo/CodeView/TypeTableCollection.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeName.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" @@ -18,14 +18,10 @@ using namespace llvm; using namespace llvm::codeview; -static void error(Error &&EC) { - assert(!static_cast<bool>(EC)); - if (EC) - consumeError(std::move(EC)); -} - TypeTableCollection::TypeTableCollection(ArrayRef<ArrayRef<uint8_t>> Records) - : Records(Records), Database(Records.size()) {} + : NameStorage(Allocator), Records(Records) { + Names.resize(Records.size()); +} Optional<TypeIndex> TypeTableCollection::getFirst() { if (empty()) @@ -34,50 +30,38 @@ Optional<TypeIndex> TypeTableCollection::getFirst() { } Optional<TypeIndex> TypeTableCollection::getNext(TypeIndex Prev) { + assert(contains(Prev)); ++Prev; - assert(Prev.toArrayIndex() <= size()); if (Prev.toArrayIndex() == size()) return None; return Prev; } -void TypeTableCollection::ensureTypeExists(TypeIndex Index) { - assert(hasCapacityFor(Index)); - - if (Database.contains(Index)) - return; - - BinaryByteStream Bytes(Records[Index.toArrayIndex()], support::little); - - CVType Type; - uint32_t Len; - VarStreamArrayExtractor<CVType> Extract; - error(Extract(Bytes, Len, Type)); - - TypeDatabaseVisitor DBV(Database); - error(codeview::visitTypeRecord(Type, Index, DBV)); - assert(Database.contains(Index)); -} - CVType TypeTableCollection::getType(TypeIndex Index) { - ensureTypeExists(Index); - return Database.getTypeRecord(Index); + assert(Index.toArrayIndex() < Records.size()); + ArrayRef<uint8_t> Bytes = Records[Index.toArrayIndex()]; + const RecordPrefix *Prefix = + reinterpret_cast<const RecordPrefix *>(Bytes.data()); + TypeLeafKind Kind = static_cast<TypeLeafKind>(uint16_t(Prefix->RecordKind)); + return CVType(Kind, Bytes); } StringRef TypeTableCollection::getTypeName(TypeIndex Index) { - if (!Index.isSimple()) - ensureTypeExists(Index); - return Database.getTypeName(Index); + if (Index.isNoneType() || Index.isSimple()) + return TypeIndex::simpleTypeName(Index); + + uint32_t I = Index.toArrayIndex(); + if (Names[I].data() == nullptr) { + StringRef Result = NameStorage.save(computeTypeName(*this, Index)); + Names[I] = Result; + } + return Names[I]; } bool TypeTableCollection::contains(TypeIndex Index) { - return Database.contains(Index); + return Index.toArrayIndex() <= size(); } uint32_t TypeTableCollection::size() { return Records.size(); } uint32_t TypeTableCollection::capacity() { return Records.size(); } - -bool TypeTableCollection::hasCapacityFor(TypeIndex Index) const { - return Index.toArrayIndex() < Records.size(); -} diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index 57eac91f8c192..bb475a669efb2 100644 --- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -65,46 +65,52 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, if (A && F) { Optional<int64_t> V; bool IsImplicitConst = (F == DW_FORM_implicit_const); - if (IsImplicitConst) + if (IsImplicitConst) { V = Data.getSLEB128(OffsetPtr); - else if (auto Size = DWARFFormValue::getFixedByteSize(F)) - V = *Size; - AttributeSpecs.push_back(AttributeSpec(A, F, V)); - if (IsImplicitConst) + AttributeSpecs.push_back(AttributeSpec(A, F, V)); continue; + } // If this abbrevation still has a fixed byte size, then update the // FixedAttributeSize as needed. - if (FixedAttributeSize) { - if (V) - FixedAttributeSize->NumBytes += *V; - else { - switch (F) { - case DW_FORM_addr: - ++FixedAttributeSize->NumAddrs; - break; - - case DW_FORM_ref_addr: - ++FixedAttributeSize->NumRefAddrs; - break; - - case DW_FORM_strp: - case DW_FORM_GNU_ref_alt: - case DW_FORM_GNU_strp_alt: - case DW_FORM_line_strp: - case DW_FORM_sec_offset: - case DW_FORM_strp_sup: - ++FixedAttributeSize->NumDwarfOffsets; - break; - - default: - // Indicate we no longer have a fixed byte size for this - // abbreviation by clearing the FixedAttributeSize optional value - // so it doesn't have a value. - FixedAttributeSize.reset(); - break; - } + switch (F) { + case DW_FORM_addr: + if (FixedAttributeSize) + ++FixedAttributeSize->NumAddrs; + break; + + case DW_FORM_ref_addr: + if (FixedAttributeSize) + ++FixedAttributeSize->NumRefAddrs; + break; + + case DW_FORM_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + if (FixedAttributeSize) + ++FixedAttributeSize->NumDwarfOffsets; + break; + + default: + // The form has a byte size that doesn't depend on Params. + // If it's a fixed size, keep track of it. + if (auto Size = + DWARFFormValue::getFixedByteSize(F, DWARFFormParams())) { + V = *Size; + if (FixedAttributeSize) + FixedAttributeSize->NumBytes += *V; + break; } + // Indicate we no longer have a fixed byte size for this + // abbreviation by clearing the FixedAttributeSize optional value + // so it doesn't have a value. + FixedAttributeSize.reset(); + break; } + // Record this attribute and its fixed size if it has one. + AttributeSpecs.push_back(AttributeSpec(A, F, V)); } else if (A == 0 && F == 0) { // We successfully reached the end of this abbreviation declaration // since both attribute and form are zero. @@ -186,7 +192,8 @@ Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue( if (auto FixedSize = Spec.getByteSize(U)) Offset += *FixedSize; else - DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, &U); + DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, + U.getFormParams()); ++AttrIndex; } return None; @@ -211,7 +218,8 @@ Optional<int64_t> DWARFAbbreviationDeclaration::AttributeSpec::getByteSize( if (ByteSizeOrValue) return ByteSizeOrValue; Optional<int64_t> S; - auto FixedByteSize = DWARFFormValue::getFixedByteSize(Form, &U); + auto FixedByteSize = + DWARFFormValue::getFixedByteSize(Form, U.getFormParams()); if (FixedByteSize) S = *FixedByteSize; return S; diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 9bafcde57f0ae..3814794617503 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" @@ -36,7 +37,6 @@ #include "llvm/Object/RelocVisitor.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" @@ -44,8 +44,8 @@ #include <algorithm> #include <cstdint> #include <map> -#include <set> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -55,9 +55,9 @@ using namespace object; #define DEBUG_TYPE "dwarf" -typedef DWARFDebugLine::LineTable DWARFLineTable; -typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; -typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; +using DWARFLineTable = DWARFDebugLine::LineTable; +using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; +using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size, uint32_t *Off, const RelocAddrMap *Relocs, @@ -201,8 +201,7 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, } } -void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){ - +void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { DIDumpType DumpType = DumpOpts.DumpType; bool DumpEH = DumpOpts.DumpEH; bool SummarizeTypes = DumpOpts.SummarizeTypes; @@ -1068,7 +1067,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, errs() << "error: failed to compute relocation: " << Name << "\n"; continue; } - llvm::RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val}; + RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val}; Map->insert({Reloc.getOffset(), Rel}); } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index cf9fec2b3254c..475cf25b781b4 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" - #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" @@ -71,7 +70,7 @@ protected: /// An entry may contain CFI instructions. An instruction consists of an /// opcode and an optional sequence of operands. - typedef std::vector<uint64_t> Operands; + using Operands = std::vector<uint64_t>; struct Instruction { Instruction(uint8_t Opcode) : Opcode(Opcode) @@ -518,14 +517,13 @@ static uint64_t readPointer(const DataExtractor &Data, uint32_t &Offset, // noreturn attribute usage in lambdas. Once the support for those // compilers are phased out, we can remove this and return back to // a ReportError lambda: [StartOffset](const char *ErrorMsg). -#define ReportError(ErrorMsg) ReportErrorImpl(StartOffset,ErrorMsg) -static void LLVM_ATTRIBUTE_NORETURN -ReportErrorImpl(uint32_t StartOffset, const char *ErrorMsg) { - std::string Str; - raw_string_ostream OS(Str); - OS << format(ErrorMsg, StartOffset); - OS.flush(); - report_fatal_error(Str); +static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset, + const char *ErrorMsg) { + std::string Str; + raw_string_ostream OS(Str); + OS << format(ErrorMsg, StartOffset); + OS.flush(); + report_fatal_error(Str); } void DWARFDebugFrame::parse(DataExtractor Data) { @@ -590,13 +588,15 @@ void DWARFDebugFrame::parse(DataExtractor Data) { for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { switch (AugmentationString[i]) { default: - ReportError("Unknown augmentation character in entry at %lx"); + ReportError(StartOffset, + "Unknown augmentation character in entry at %lx"); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; case 'P': { if (Personality) - ReportError("Duplicate personality in entry at %lx"); + ReportError(StartOffset, + "Duplicate personality in entry at %lx"); PersonalityEncoding = Data.getU8(&Offset); Personality = readPointer(Data, Offset, *PersonalityEncoding); break; @@ -606,7 +606,8 @@ void DWARFDebugFrame::parse(DataExtractor Data) { break; case 'z': if (i) - ReportError("'z' must be the first character at %lx"); + ReportError(StartOffset, + "'z' must be the first character at %lx"); // Parse the augmentation length first. We only parse it if // the string contains a 'z'. AugmentationLength = Data.getULEB128(&Offset); @@ -618,7 +619,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) { if (AugmentationLength.hasValue()) { if (Offset != EndAugmentationOffset) - ReportError("Parsing augmentation data at %lx failed"); + ReportError(StartOffset, "Parsing augmentation data at %lx failed"); AugmentationData = Data.getData().slice(StartAugmentationOffset, EndAugmentationOffset); @@ -645,7 +646,8 @@ void DWARFDebugFrame::parse(DataExtractor Data) { if (IsEH) { // The address size is encoded in the CIE we reference. if (!Cie) - ReportError("Parsing FDE data at %lx failed due to missing CIE"); + ReportError(StartOffset, + "Parsing FDE data at %lx failed due to missing CIE"); InitialLocation = readPointer(Data, Offset, Cie->getFDEPointerEncoding()); @@ -665,7 +667,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) { readPointer(Data, Offset, Cie->getLSDAPointerEncoding()); if (Offset != EndAugmentationOffset) - ReportError("Parsing augmentation data at %lx failed"); + ReportError(StartOffset, "Parsing augmentation data at %lx failed"); } } else { InitialLocation = Data.getAddress(&Offset); @@ -680,7 +682,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) { Entries.back()->parseInstructions(Data, &Offset, EndStructureOffset); if (Offset != EndStructureOffset) - ReportError("Parsing entry instructions at %lx failed"); + ReportError(StartOffset, "Parsing entry instructions at %lx failed"); } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index dbcc64fc0832f..1551974b822ac 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -59,7 +59,7 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, // Attribute byte size if fixed, just add the size to the offset. *OffsetPtr += *FixedSize; } else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData, - OffsetPtr, &U)) { + OffsetPtr, U.getFormParams())) { // We failed to skip this attribute's value, restore the original offset // and return the failure status. *OffsetPtr = Offset; diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index cda3e75fbc3e7..ad5647f3e03d8 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -9,6 +9,8 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" @@ -26,23 +28,27 @@ using namespace llvm; using namespace dwarf; -typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; +using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; + namespace { + struct ContentDescriptor { dwarf::LineNumberEntryFormat Type; dwarf::Form Form; }; -typedef SmallVector<ContentDescriptor, 4> ContentDescriptors; + +using ContentDescriptors = SmallVector<ContentDescriptor, 4>; + } // end anonmyous namespace DWARFDebugLine::Prologue::Prologue() { clear(); } void DWARFDebugLine::Prologue::clear() { - TotalLength = Version = PrologueLength = 0; - AddressSize = SegSelectorSize = 0; + TotalLength = PrologueLength = 0; + SegSelectorSize = 0; MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; - IsDWARF64 = false; + FormParams = DWARFFormParams({0, 0, DWARF32}); StandardOpcodeLengths.clear(); IncludeDirectories.clear(); FileNames.clear(); @@ -51,12 +57,13 @@ void DWARFDebugLine::Prologue::clear() { void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) - << format(" version: %u\n", Version) - << format(Version >= 5 ? " address_size: %u\n" : "", AddressSize) - << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize) - << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) + << format(" version: %u\n", getVersion()); + if (getVersion() >= 5) + OS << format(" address_size: %u\n", getAddressSize()) + << format(" seg_select_size: %u\n", SegSelectorSize); + OS << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) - << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) + << format(getVersion() >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) << format(" default_is_stmt: %u\n", DefaultIsStmt) << format(" line_base: %i\n", LineBase) << format(" line_range: %u\n", LineRange) @@ -137,6 +144,7 @@ parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr, static bool parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, uint64_t EndPrologueOffset, + const DWARFFormParams &FormParams, std::vector<StringRef> &IncludeDirectories, std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { // Get the directory entry description. @@ -159,7 +167,7 @@ parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, IncludeDirectories.push_back(Value.getAsCString().getValue()); break; default: - if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr)) + if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams)) return false; } } @@ -211,24 +219,26 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, clear(); TotalLength = DebugLineData.getU32(OffsetPtr); if (TotalLength == UINT32_MAX) { - IsDWARF64 = true; + FormParams.Format = dwarf::DWARF64; TotalLength = DebugLineData.getU64(OffsetPtr); - } else if (TotalLength > 0xffffff00) { + } else if (TotalLength >= 0xffffff00) { return false; } - Version = DebugLineData.getU16(OffsetPtr); - if (Version < 2) + FormParams.Version = DebugLineData.getU16(OffsetPtr); + if (getVersion() < 2) return false; - if (Version >= 5) { - AddressSize = DebugLineData.getU8(OffsetPtr); + if (getVersion() >= 5) { + FormParams.AddrSize = DebugLineData.getU8(OffsetPtr); + assert(getAddressSize() == DebugLineData.getAddressSize() && + "Line table header and data extractor disagree"); SegSelectorSize = DebugLineData.getU8(OffsetPtr); } PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength()); const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr; MinInstLength = DebugLineData.getU8(OffsetPtr); - if (Version >= 4) + if (getVersion() >= 4) MaxOpsPerInst = DebugLineData.getU8(OffsetPtr); DefaultIsStmt = DebugLineData.getU8(OffsetPtr); LineBase = DebugLineData.getU8(OffsetPtr); @@ -241,9 +251,9 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, StandardOpcodeLengths.push_back(OpLen); } - if (Version >= 5) { + if (getVersion() >= 5) { if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, - IncludeDirectories, FileNames)) { + getFormParams(), IncludeDirectories, FileNames)) { fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64 " found an invalid directory or file table description at" @@ -333,7 +343,7 @@ void DWARFDebugLine::LineTable::clear() { } DWARFDebugLine::ParsingState::ParsingState(struct LineTable *LT) - : LineTable(LT), RowNumber(0) { + : LineTable(LT) { resetRowAndSequence(); } diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index ed1f5f46dcfb8..861114bde1f2b 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -59,48 +59,13 @@ static const DWARFFormValue::FormClass DWARF4FormClasses[] = { DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present }; -namespace { - -/// A helper class that can be used in DWARFFormValue.cpp functions that need -/// to know the byte size of DW_FORM values that vary in size depending on the -/// DWARF version, address byte size, or DWARF32 or DWARF64. -class FormSizeHelper { - uint16_t Version; - uint8_t AddrSize; - llvm::dwarf::DwarfFormat Format; - -public: - FormSizeHelper(uint16_t V, uint8_t A, llvm::dwarf::DwarfFormat F) - : Version(V), AddrSize(A), Format(F) {} - - uint8_t getAddressByteSize() const { return AddrSize; } - - uint8_t getRefAddrByteSize() const { - if (Version == 2) - return AddrSize; - return getDwarfOffsetByteSize(); - } - - uint8_t getDwarfOffsetByteSize() const { - switch (Format) { - case dwarf::DwarfFormat::DWARF32: - return 4; - case dwarf::DwarfFormat::DWARF64: - return 8; - } - llvm_unreachable("Invalid Format value"); - } -}; - -} // end anonymous namespace - -template <class T> -static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) { +Optional<uint8_t> +DWARFFormValue::getFixedByteSize(dwarf::Form Form, + const DWARFFormParams Params) { switch (Form) { case DW_FORM_addr: - if (U) - return U->getAddressByteSize(); - return None; + assert(Params.Version && Params.AddrSize && "Invalid Params for form"); + return Params.AddrSize; case DW_FORM_block: // ULEB128 length L followed by L bytes. case DW_FORM_block1: // 1 byte length L followed by L bytes. @@ -121,9 +86,8 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) { return None; case DW_FORM_ref_addr: - if (U) - return U->getRefAddrByteSize(); - return None; + assert(Params.Version && Params.AddrSize && "Invalid Params for form"); + return Params.getRefAddrByteSize(); case DW_FORM_flag: case DW_FORM_data1: @@ -138,6 +102,9 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) { case DW_FORM_addrx2: return 2; + case DW_FORM_strx3: + return 3; + case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: @@ -151,9 +118,8 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) { case DW_FORM_line_strp: case DW_FORM_sec_offset: case DW_FORM_strp_sup: - if (U) - return U->getDwarfOffsetByteSize(); - return None; + assert(Params.Version && Params.AddrSize && "Invalid Params for form"); + return Params.getDwarfOffsetByteSize(); case DW_FORM_data8: case DW_FORM_ref8: @@ -178,9 +144,9 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) { return None; } -template <class T> -static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData, - uint32_t *OffsetPtr, const T *U) { +bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, + uint32_t *OffsetPtr, + const DWARFFormParams Params) { bool Indirect = false; do { switch (Form) { @@ -240,7 +206,8 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData, case DW_FORM_line_strp: case DW_FORM_GNU_ref_alt: case DW_FORM_GNU_strp_alt: - if (Optional<uint8_t> FixedSize = ::getFixedByteSize(Form, U)) { + if (Optional<uint8_t> FixedSize = + DWARFFormValue::getFixedByteSize(Form, Params)) { *OffsetPtr += *FixedSize; return true; } @@ -274,19 +241,6 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData, return true; } -Optional<uint8_t> DWARFFormValue::getFixedByteSize(dwarf::Form Form, - const DWARFUnit *U) { - return ::getFixedByteSize(Form, U); -} - -Optional<uint8_t> -DWARFFormValue::getFixedByteSize(dwarf::Form Form, uint16_t Version, - uint8_t AddrSize, - llvm::dwarf::DwarfFormat Format) { - FormSizeHelper FSH(Version, AddrSize, Format); - return ::getFixedByteSize(Form, &FSH); -} - bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { // First, check DWARF4 form classes. if (Form < makeArrayRef(DWARF4FormClasses).size() && @@ -302,6 +256,10 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { case DW_FORM_GNU_str_index: case DW_FORM_GNU_strp_alt: case DW_FORM_strx: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: return (FC == FC_String); case DW_FORM_implicit_const: return (FC == FC_Constant); @@ -368,6 +326,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, case DW_FORM_addrx2: Value.uval = Data.getU16(OffsetPtr); break; + case DW_FORM_strx3: + Value.uval = Data.getU24(OffsetPtr); + break; case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: @@ -438,24 +399,6 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, return true; } -bool DWARFFormValue::skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr, - const DWARFUnit *U) const { - return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U); -} - -bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, const DWARFUnit *U) { - return skipFormValue(Form, DebugInfoData, OffsetPtr, U); -} - -bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, uint16_t Version, - uint8_t AddrSize, - llvm::dwarf::DwarfFormat Format) { - FormSizeHelper FSH(Version, AddrSize, Format); - return skipFormValue(Form, DebugInfoData, OffsetPtr, &FSH); -} - void DWARFFormValue::dump(raw_ostream &OS) const { uint64_t UValue = Value.uval; bool CURelativeOffset = false; @@ -545,6 +488,10 @@ void DWARFFormValue::dump(raw_ostream &OS) const { dumpString(OS); break; case DW_FORM_strx: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: case DW_FORM_GNU_str_index: OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue); dumpString(OS); @@ -623,7 +570,9 @@ Optional<const char *> DWARFFormValue::getAsCString() const { if (Form == DW_FORM_GNU_strp_alt || U == nullptr) return None; uint32_t Offset = Value.uval; - if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx) { + if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx || + Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 || + Form == DW_FORM_strx4) { uint64_t StrOffset; if (!U->getStringOffsetSectionItem(Offset, StrOffset)) return None; diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index 09e6a292e5fe1..fd9c7c2b1d46c 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -1,4 +1,4 @@ -//===-- DWARFUnit.cpp -----------------------------------------------------===// +//===- DWARFUnit.cpp ------------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" @@ -17,8 +16,6 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Path.h" #include <algorithm> @@ -26,6 +23,7 @@ #include <cstddef> #include <cstdint> #include <cstdio> +#include <utility> #include <vector> using namespace llvm; @@ -55,8 +53,8 @@ DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, const DWARFUnitIndex::Entry *IndexEntry) : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), LineSection(LS), StringSection(SS), StringOffsetSection(SOS), - StringOffsetSectionBase(0), AddrOffsetSection(AOS), isLittleEndian(LE), - isDWO(IsDWO), UnitSection(UnitSection), IndexEntry(IndexEntry) { + AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO), + UnitSection(UnitSection), IndexEntry(IndexEntry) { clear(); } @@ -64,11 +62,13 @@ DWARFUnit::~DWARFUnit() = default; bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const { - uint32_t Offset = AddrOffsetSectionBase + Index * AddrSize; - if (AddrOffsetSection->Data.size() < Offset + AddrSize) + uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize(); + if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize()) return false; - DataExtractor DA(AddrOffsetSection->Data, isLittleEndian, AddrSize); - Result = getRelocatedValue(DA, AddrSize, &Offset, &AddrOffsetSection->Relocs); + DataExtractor DA(AddrOffsetSection->Data, isLittleEndian, + getAddressByteSize()); + Result = getRelocatedValue(DA, getAddressByteSize(), &Offset, + &AddrOffsetSection->Relocs); return true; } @@ -94,15 +94,17 @@ uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const { bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { Length = debug_info.getU32(offset_ptr); - Version = debug_info.getU16(offset_ptr); + // FIXME: Support DWARF64. + FormParams.Format = DWARF32; + FormParams.Version = debug_info.getU16(offset_ptr); uint64_t AbbrOffset; - if (Version >= 5) { + if (FormParams.Version >= 5) { UnitType = debug_info.getU8(offset_ptr); - AddrSize = debug_info.getU8(offset_ptr); + FormParams.AddrSize = debug_info.getU8(offset_ptr); AbbrOffset = debug_info.getU32(offset_ptr); } else { AbbrOffset = debug_info.getU32(offset_ptr); - AddrSize = debug_info.getU8(offset_ptr); + FormParams.AddrSize = debug_info.getU8(offset_ptr); } if (IndexEntry) { if (AbbrOffset) @@ -117,14 +119,14 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { } bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); - bool VersionOK = DWARFContext::isSupportedVersion(Version); - bool AddrSizeOK = AddrSize == 4 || AddrSize == 8; + bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); + bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8; if (!LengthOK || !VersionOK || !AddrSizeOK) return false; // Keep track of the highest DWARF version we encounter across all units. - Context.setMaxVersionIfGreater(Version); + Context.setMaxVersionIfGreater(getVersion()); Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); return Abbrevs != nullptr; @@ -150,7 +152,8 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(!DieArray.empty()); - DataExtractor RangesData(RangeSection->Data, isLittleEndian, AddrSize); + DataExtractor RangesData(RangeSection->Data, isLittleEndian, + getAddressByteSize()); uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; return RangeList.extract(RangesData, &ActualRangeListOffset, RangeSection->Relocs); @@ -159,9 +162,8 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, void DWARFUnit::clear() { Offset = 0; Length = 0; - Version = 0; Abbrevs = nullptr; - AddrSize = 0; + FormParams = DWARFFormParams({0, 0, DWARF32}); BaseAddr = 0; RangeSectionBase = 0; AddrOffsetSectionBase = 0; diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index a6240fb60143c..41907e5705637 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -279,7 +279,6 @@ bool DWARFVerifier::handleDebugLine() { bool DWARFVerifier::handleAppleNames() { NumAppleNamesErrors = 0; - OS << "Verifying .apple_names...\n"; DataExtractor AppleNamesSection(DCtx.getAppleNamesSection().Data, DCtx.isLittleEndian(), 0); @@ -288,10 +287,11 @@ bool DWARFVerifier::handleAppleNames() { DCtx.getAppleNamesSection().Relocs); if (!AppleNames.extract()) { - OS << "error: cannot extract .apple_names accelerator table\n"; - return false; + return true; } + OS << "Verifying .apple_names...\n"; + // Verify that all buckets have a valid hash index or are empty uint32_t NumBuckets = AppleNames.getNumBuckets(); uint32_t NumHashes = AppleNames.getNumHashes(); diff --git a/lib/DebugInfo/MSF/MSFBuilder.cpp b/lib/DebugInfo/MSF/MSFBuilder.cpp index 5b1b5d8dc4d55..0f4f785abf55a 100644 --- a/lib/DebugInfo/MSF/MSFBuilder.cpp +++ b/lib/DebugInfo/MSF/MSFBuilder.cpp @@ -1,3 +1,4 @@ +//===- MSFBuilder.cpp -----------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,22 +7,30 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFError.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <memory> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::msf; using namespace llvm::support; -namespace { -const uint32_t kSuperBlockBlock = 0; -const uint32_t kFreePageMap0Block = 1; -const uint32_t kFreePageMap1Block = 2; -const uint32_t kNumReservedPages = 3; +static const uint32_t kSuperBlockBlock = 0; +static const uint32_t kFreePageMap0Block = 1; +static const uint32_t kFreePageMap1Block = 2; +static const uint32_t kNumReservedPages = 3; -const uint32_t kDefaultFreePageMap = kFreePageMap0Block; -const uint32_t kDefaultBlockMapAddr = kNumReservedPages; -} +static const uint32_t kDefaultFreePageMap = kFreePageMap0Block; +static const uint32_t kDefaultBlockMapAddr = kNumReservedPages; MSFBuilder::MSFBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow, BumpPtrAllocator &Allocator) @@ -263,7 +272,7 @@ Expected<MSFLayout> MSFBuilder::build() { // The stream sizes should be re-allocated as a stable pointer and the stream // map should have each of its entries allocated as a separate stable pointer. - if (StreamData.size() > 0) { + if (!StreamData.empty()) { ulittle32_t *Sizes = Allocator.Allocate<ulittle32_t>(StreamData.size()); L.StreamSizes = ArrayRef<ulittle32_t>(Sizes, StreamData.size()); L.StreamMap.resize(StreamData.size()); diff --git a/lib/DebugInfo/MSF/MSFCommon.cpp b/lib/DebugInfo/MSF/MSFCommon.cpp index fdab7884646ec..1facf5efb4bbb 100644 --- a/lib/DebugInfo/MSF/MSFCommon.cpp +++ b/lib/DebugInfo/MSF/MSFCommon.cpp @@ -1,4 +1,4 @@ -//===- MSFCommon.cpp - Common types and functions for MSF files -*- C++ -*-===// +//===- MSFCommon.cpp - Common types and functions for MSF files -----------===// // // The LLVM Compiler Infrastructure // @@ -9,6 +9,10 @@ #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MSFError.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include <cstdint> +#include <cstring> using namespace llvm; using namespace llvm::msf; diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp index faf2442bc94bb..e45f4ae0ed940 100644 --- a/lib/DebugInfo/MSF/MappedBlockStream.cpp +++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp @@ -8,23 +8,33 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/MSF/MappedBlockStream.h" - -#include "llvm/DebugInfo/MSF/IMSFFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MSFStreamLayout.h" -#include "llvm/Support/BinaryStreamError.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <utility> +#include <vector> using namespace llvm; using namespace llvm::msf; namespace { + template <typename Base> class MappedBlockStreamImpl : public Base { public: template <typename... Args> MappedBlockStreamImpl(Args &&... Params) : Base(std::forward<Args>(Params)...) {} }; -} + +} // end anonymous namespace static void initializeFpmStreamLayout(const MSFLayout &Layout, MSFStreamLayout &FpmLayout) { @@ -39,7 +49,8 @@ static void initializeFpmStreamLayout(const MSFLayout &Layout, FpmLayout.Length = msf::getFullFpmByteSize(Layout); } -typedef std::pair<uint32_t, uint32_t> Interval; +using Interval = std::pair<uint32_t, uint32_t>; + static Interval intersect(const Interval &I1, const Interval &I2) { return std::make_pair(std::max(I1.first, I2.first), std::min(I1.second, I2.second)); @@ -214,7 +225,7 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size, uint32_t OffsetInBlock = Offset % BlockSize; uint32_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock); uint32_t NumAdditionalBlocks = - llvm::alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize; + alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize; uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1; uint32_t E = StreamLayout.Blocks[BlockNum]; diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp index ef47b92b4f2f3..ef9390cda3127 100644 --- a/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -151,7 +151,7 @@ void DIASession::setLoadAddress(uint64_t Address) { Session->put_loadAddress(Address); } -std::unique_ptr<PDBSymbolExe> DIASession::getGlobalScope() const { +std::unique_ptr<PDBSymbolExe> DIASession::getGlobalScope() { CComPtr<IDiaSymbol> GlobalScope; if (S_OK != Session->get_globalScope(&GlobalScope)) return nullptr; diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 81a9d3eeec619..745dd742aadc3 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -51,6 +51,7 @@ DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName, uint32_t ModIndex, msf::MSFBuilder &Msf) : MSF(Msf), ModuleName(ModuleName) { + ::memset(&Layout, 0, sizeof(Layout)); Layout.Mod = ModIndex; } @@ -102,6 +103,7 @@ template <typename T> struct Foo { template <typename T> Foo<T> makeFoo(T &&t) { return Foo<T>(std::move(t)); } void DbiModuleDescriptorBuilder::finalize() { + Layout.SC.ModuleIndex = Layout.Mod; Layout.FileNameOffs = 0; // TODO: Fix this Layout.Flags = 0; // TODO: Fix this Layout.C11Bytes = 0; @@ -182,3 +184,9 @@ void DbiModuleDescriptorBuilder::addDebugSubsection( C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>( std::move(Subsection), CodeViewContainer::Pdb)); } + +void DbiModuleDescriptorBuilder::addDebugSubsection( + const DebugSubsectionRecord &SubsectionContents) { + C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>( + SubsectionContents, CodeViewContainer::Pdb)); +} diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index 24322d942facc..a1f0671dec3e6 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -99,29 +99,27 @@ Error DbiStream::reload() { return make_error<RawError>(raw_error_code::corrupt_file, "DBI type server substream not aligned."); - BinaryStreamRef ModInfoSubstream; - BinaryStreamRef FileInfoSubstream; - if (auto EC = - Reader.readStreamRef(ModInfoSubstream, Header->ModiSubstreamSize)) + if (auto EC = Reader.readSubstream(ModiSubstream, Header->ModiSubstreamSize)) return EC; - if (auto EC = Reader.readStreamRef(SecContrSubstream, + if (auto EC = Reader.readSubstream(SecContrSubstream, Header->SecContrSubstreamSize)) return EC; - if (auto EC = Reader.readStreamRef(SecMapSubstream, Header->SectionMapSize)) + if (auto EC = Reader.readSubstream(SecMapSubstream, Header->SectionMapSize)) return EC; - if (auto EC = Reader.readStreamRef(FileInfoSubstream, Header->FileInfoSize)) + if (auto EC = Reader.readSubstream(FileInfoSubstream, Header->FileInfoSize)) return EC; if (auto EC = - Reader.readStreamRef(TypeServerMapSubstream, Header->TypeServerSize)) + Reader.readSubstream(TypeServerMapSubstream, Header->TypeServerSize)) return EC; - if (auto EC = Reader.readStreamRef(ECSubstream, Header->ECSubstreamSize)) + if (auto EC = Reader.readSubstream(ECSubstream, Header->ECSubstreamSize)) return EC; if (auto EC = Reader.readArray( DbgStreams, Header->OptionalDbgHdrSize / sizeof(ulittle16_t))) return EC; - if (auto EC = Modules.initialize(ModInfoSubstream, FileInfoSubstream)) + if (auto EC = Modules.initialize(ModiSubstream.StreamData, + FileInfoSubstream.StreamData)) return EC; if (auto EC = initializeSectionContributionData()) @@ -137,8 +135,8 @@ Error DbiStream::reload() { return make_error<RawError>(raw_error_code::corrupt_file, "Found unexpected bytes in DBI Stream."); - if (ECSubstream.getLength() > 0) { - BinaryStreamReader ECReader(ECSubstream); + if (!ECSubstream.empty()) { + BinaryStreamReader ECReader(ECSubstream.StreamData); if (auto EC = ECNames.reload(ECReader)) return EC; } @@ -228,10 +226,10 @@ void DbiStream::visitSectionContributions( } Error DbiStream::initializeSectionContributionData() { - if (SecContrSubstream.getLength() == 0) + if (SecContrSubstream.empty()) return Error::success(); - BinaryStreamReader SCReader(SecContrSubstream); + BinaryStreamReader SCReader(SecContrSubstream.StreamData); if (auto EC = SCReader.readEnum(SectionContribVersion)) return EC; @@ -302,11 +300,33 @@ Error DbiStream::initializeFpoRecords() { return Error::success(); } +BinarySubstreamRef DbiStream::getSectionContributionData() const { + return SecContrSubstream; +} + +BinarySubstreamRef DbiStream::getSecMapSubstreamData() const { + return SecMapSubstream; +} + +BinarySubstreamRef DbiStream::getModiSubstreamData() const { + return ModiSubstream; +} + +BinarySubstreamRef DbiStream::getFileInfoSubstreamData() const { + return FileInfoSubstream; +} + +BinarySubstreamRef DbiStream::getTypeServerMapSubstreamData() const { + return TypeServerMapSubstream; +} + +BinarySubstreamRef DbiStream::getECSubstreamData() const { return ECSubstream; } + Error DbiStream::initializeSectionMapData() { - if (SecMapSubstream.getLength() == 0) + if (SecMapSubstream.empty()) return Error::success(); - BinaryStreamReader SMReader(SecMapSubstream); + BinaryStreamReader SMReader(SecMapSubstream.StreamData); const SecMapHeader *Header; if (auto EC = SMReader.readObject(Header)) return EC; diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index e7304b444f23f..aad247ea185f2 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -90,10 +90,14 @@ Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) { if (ModIter == ModiMap.end()) return make_error<RawError>(raw_error_code::no_entry, "The specified module was not found"); + return addModuleSourceFile(*ModIter->second, File); +} + +Error DbiStreamBuilder::addModuleSourceFile(DbiModuleDescriptorBuilder &Module, + StringRef File) { uint32_t Index = SourceFileNames.size(); SourceFileNames.insert(std::make_pair(File, Index)); - auto &ModEntry = *ModIter; - ModEntry.second->addSourceFile(File); + Module.addSourceFile(File); return Error::success(); } @@ -233,6 +237,7 @@ Error DbiStreamBuilder::finalize() { return EC; DbiStreamHeader *H = Allocator.Allocate<DbiStreamHeader>(); + ::memset(H, 0, sizeof(DbiStreamHeader)); H->VersionHeader = *VerHeader; H->VersionSignature = -1; H->Age = Age; diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp index a3979d480bf45..21b66b3e7bcff 100644 --- a/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -57,6 +57,10 @@ Error InfoStream::reload() { uint32_t NewOffset = Reader.getOffset(); NamedStreamMapByteSize = NewOffset - Offset; + Reader.setOffset(Offset); + if (auto EC = Reader.readSubstream(SubNamedStreams, NamedStreamMapByteSize)) + return EC; + bool Stop = false; while (!Stop && !Reader.empty()) { PdbRaw_FeatureSig Sig; @@ -129,3 +133,7 @@ ArrayRef<PdbRaw_FeatureSig> InfoStream::getFeatureSignatures() const { const NamedStreamMap &InfoStream::getNamedStreams() const { return NamedStreams; } + +BinarySubstreamRef InfoStream::getNamedStreamsBuffer() const { + return SubNamedStreams; +} diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index 4186f2eb6ba01..83c56574a16e5 100644 --- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp +++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -47,15 +47,19 @@ Error ModuleDebugStreamRef::reload() { if (auto EC = Reader.readInteger(Signature)) return EC; - if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4)) + if (auto EC = Reader.readSubstream(SymbolsSubstream, SymbolSize - 4)) return EC; - - if (auto EC = Reader.readStreamRef(C11LinesSubstream, C11Size)) + if (auto EC = Reader.readSubstream(C11LinesSubstream, C11Size)) + return EC; + if (auto EC = Reader.readSubstream(C13LinesSubstream, C13Size)) return EC; - if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size)) + + BinaryStreamReader SymbolReader(SymbolsSubstream.StreamData); + if (auto EC = + SymbolReader.readArray(SymbolArray, SymbolReader.bytesRemaining())) return EC; - BinaryStreamReader SubsectionsReader(C13LinesSubstream); + BinaryStreamReader SubsectionsReader(C13LinesSubstream.StreamData); if (auto EC = SubsectionsReader.readArray(Subsections, SubsectionsReader.bytesRemaining())) return EC; @@ -63,7 +67,7 @@ Error ModuleDebugStreamRef::reload() { uint32_t GlobalRefsSize; if (auto EC = Reader.readInteger(GlobalRefsSize)) return EC; - if (auto EC = Reader.readStreamRef(GlobalRefsSubstream, GlobalRefsSize)) + if (auto EC = Reader.readSubstream(GlobalRefsSubstream, GlobalRefsSize)) return EC; if (Reader.bytesRemaining() > 0) return make_error<RawError>(raw_error_code::corrupt_file, @@ -72,9 +76,25 @@ Error ModuleDebugStreamRef::reload() { return Error::success(); } +BinarySubstreamRef ModuleDebugStreamRef::getSymbolsSubstream() const { + return SymbolsSubstream; +} + +BinarySubstreamRef ModuleDebugStreamRef::getC11LinesSubstream() const { + return C11LinesSubstream; +} + +BinarySubstreamRef ModuleDebugStreamRef::getC13LinesSubstream() const { + return C13LinesSubstream; +} + +BinarySubstreamRef ModuleDebugStreamRef::getGlobalRefsSubstream() const { + return GlobalRefsSubstream; +} + iterator_range<codeview::CVSymbolArray::Iterator> ModuleDebugStreamRef::symbols(bool *HadError) const { - return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end()); + return make_range(SymbolArray.begin(HadError), SymbolArray.end()); } llvm::iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator> @@ -83,7 +103,7 @@ ModuleDebugStreamRef::subsections() const { } bool ModuleDebugStreamRef::hasDebugSubsections() const { - return C13LinesSubstream.getLength() > 0; + return !C13LinesSubstream.empty(); } Error ModuleDebugStreamRef::commit() { return Error::success(); } diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp index c7ba32b82bc6b..4f90cd9cd8ac0 100644 --- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -23,6 +23,14 @@ using namespace llvm; using namespace llvm::pdb; +// FIXME: This shouldn't be necessary, but if we insert the strings in any +// other order, cvdump cannot read the generated name map. This suggests that +// we may be using the wrong hash function. A closer inspection of the cvdump +// source code may reveal something, but for now this at least makes us work, +// even if only by accident. +static constexpr const char *OrderedStreamNames[] = {"/LinkInfo", "/names", + "/src/headerblock"}; + NamedStreamMap::NamedStreamMap() = default; Error NamedStreamMap::load(BinaryStreamReader &Stream) { @@ -73,9 +81,10 @@ Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const { if (auto EC = Writer.writeInteger(FinalizedInfo->StringDataBytes)) return EC; - // Now all of the string data itself. - for (const auto &Item : Mapping) { - if (auto EC = Writer.writeCString(Item.getKey())) + for (const auto &Name : OrderedStreamNames) { + auto Item = Mapping.find(Name); + assert(Item != Mapping.end()); + if (auto EC = Writer.writeCString(Item->getKey())) return EC; } @@ -93,9 +102,12 @@ uint32_t NamedStreamMap::finalize() { // Build the finalized hash table. FinalizedHashTable.clear(); FinalizedInfo.emplace(); - for (const auto &Item : Mapping) { - FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item.getValue()); - FinalizedInfo->StringDataBytes += Item.getKeyLength() + 1; + + for (const auto &Name : OrderedStreamNames) { + auto Item = Mapping.find(Name); + assert(Item != Mapping.end()); + FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item->getValue()); + FinalizedInfo->StringDataBytes += Item->getKeyLength() + 1; } // Number of bytes of string data. diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 77f832582f824..180c169ec209c 100644 --- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -9,17 +9,24 @@ #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" +#include "llvm/ADT/STLExtras.h" + namespace llvm { namespace pdb { NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session, + uint32_t SymbolId, DbiModuleDescriptor MI) - : NativeRawSymbol(Session), Module(MI) {} + : NativeRawSymbol(Session, SymbolId), Module(MI) {} PDB_SymType NativeCompilandSymbol::getSymTag() const { return PDB_SymType::Compiland; } +std::unique_ptr<NativeRawSymbol> NativeCompilandSymbol::clone() const { + return llvm::make_unique<NativeCompilandSymbol>(Session, SymbolId, Module); +} + bool NativeCompilandSymbol::isEditAndContinueEnabled() const { return Module.hasECInfo(); } diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp index 97319fd77d117..c23120041164a 100644 --- a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp +++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp @@ -34,7 +34,7 @@ NativeEnumModules::getChildAtIndex(uint32_t Index) const { return nullptr; return std::unique_ptr<PDBSymbol>(new PDBSymbolCompiland( Session, std::unique_ptr<IPDBRawSymbol>(new NativeCompilandSymbol( - Session, Modules.getModuleDescriptor(Index))))); + Session, 0, Modules.getModuleDescriptor(Index))))); } std::unique_ptr<PDBSymbol> NativeEnumModules::getNext() { diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index bb52560be167a..6206155b9fb64 100644 --- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h" @@ -17,8 +18,12 @@ namespace llvm { namespace pdb { -NativeExeSymbol::NativeExeSymbol(NativeSession &Session) - : NativeRawSymbol(Session), File(Session.getPDBFile()) {} +NativeExeSymbol::NativeExeSymbol(NativeSession &Session, uint32_t SymbolId) + : NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {} + +std::unique_ptr<NativeRawSymbol> NativeExeSymbol::clone() const { + return llvm::make_unique<NativeExeSymbol>(Session, SymbolId); +} std::unique_ptr<IPDBEnumSymbols> NativeExeSymbol::findChildren(PDB_SymType Type) const { diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 70968d4330b07..ed6db63edbabf 100644 --- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -22,8 +22,8 @@ using namespace llvm; using namespace llvm::pdb; -NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession) - : Session(PDBSession) {} +NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, uint32_t SymbolId) + : Session(PDBSession), SymbolId(SymbolId) {} void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {} @@ -253,9 +253,7 @@ uint32_t NativeRawSymbol::getSubTypeId() const { std::string NativeRawSymbol::getSymbolsFileName() const { return ""; } -uint32_t NativeRawSymbol::getSymIndexId() const { - return 0; -} +uint32_t NativeRawSymbol::getSymIndexId() const { return SymbolId; } uint32_t NativeRawSymbol::getTargetOffset() const { return 0; diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index 7e6843bceb7db..3ab381e76e628 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -70,12 +70,11 @@ uint64_t NativeSession::getLoadAddress() const { return 0; } void NativeSession::setLoadAddress(uint64_t Address) {} -std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() const { - auto RawSymbol = - llvm::make_unique<NativeExeSymbol>(const_cast<NativeSession &>(*this)); +std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() { + auto RawSymbol = llvm::make_unique<NativeExeSymbol>(*this, 0); auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol))); std::unique_ptr<PDBSymbolExe> ExeSymbol( - static_cast<PDBSymbolExe *>(PdbSymbol.release())); + static_cast<PDBSymbolExe *>(PdbSymbol.release())); return ExeSymbol; } diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index a9597cdf4c4d3..4f6ebb0cb3428 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -230,6 +230,14 @@ ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { return ContainerLayout.DirectoryBlocks; } +MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { + MSFStreamLayout Result; + auto Blocks = getStreamBlockList(StreamIdx); + Result.Blocks.assign(Blocks.begin(), Blocks.end()); + Result.Length = getStreamByteSize(StreamIdx); + return Result; +} + Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { if (!Globals) { auto DbiS = getPDBDbiStream(); diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp index 67c803d3124ec..f917ef91f6396 100644 --- a/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -66,7 +66,13 @@ Error TpiStream::reload() { "TPI Stream Invalid number of hash buckets."); // The actual type records themselves come from this stream - if (auto EC = Reader.readArray(TypeRecords, Header->TypeRecordBytes)) + if (auto EC = + Reader.readSubstream(TypeRecordsSubstream, Header->TypeRecordBytes)) + return EC; + + BinaryStreamReader RecordReader(TypeRecordsSubstream.StreamData); + if (auto EC = + RecordReader.readArray(TypeRecords, TypeRecordsSubstream.size())) return EC; // Hash indices, hash values, etc come from the hash stream. @@ -135,6 +141,10 @@ uint16_t TpiStream::getTypeHashStreamAuxIndex() const { uint32_t TpiStream::getNumHashBuckets() const { return Header->NumHashBuckets; } uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; } +BinarySubstreamRef TpiStream::getTypeRecordsSubstream() const { + return TypeRecordsSubstream; +} + FixedStreamArray<support::ulittle32_t> TpiStream::getHashValues() const { return HashValues; } diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp index 8dcd49aaab5bb..5fe259f80b6fb 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindings.cpp +++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp @@ -12,6 +12,24 @@ using namespace llvm; +LLVMSharedModuleRef LLVMOrcMakeSharedModule(LLVMModuleRef Mod) { + return wrap(new std::shared_ptr<Module>(unwrap(Mod))); +} + +void LLVMOrcDisposeSharedModuleRef(LLVMSharedModuleRef SharedMod) { + delete unwrap(SharedMod); +} + +LLVMSharedObjectBufferRef +LLVMOrcMakeSharedObjectBuffer(LLVMMemoryBufferRef ObjBuffer) { + return wrap(new std::shared_ptr<MemoryBuffer>(unwrap(ObjBuffer))); +} + +void +LLVMOrcDisposeSharedObjectBufferRef(LLVMSharedObjectBufferRef SharedObjBuffer) { + delete unwrap(SharedObjBuffer); +} + LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) { TargetMachine *TM2(unwrap(TM)); @@ -65,21 +83,23 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, } LLVMOrcModuleHandle -LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, +LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); - Module *M(unwrap(Mod)); - return J.addIRModuleEager(M, SymbolResolver, SymbolResolverCtx); + std::shared_ptr<Module> *M(unwrap(Mod)); + return J.addIRModuleEager(*M, SymbolResolver, SymbolResolverCtx); } LLVMOrcModuleHandle -LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, +LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); - Module *M(unwrap(Mod)); - return J.addIRModuleLazy(M, SymbolResolver, SymbolResolverCtx); + std::shared_ptr<Module> *M(unwrap(Mod)); + return J.addIRModuleLazy(*M, SymbolResolver, SymbolResolverCtx); } void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) { diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index a79dd844bf4f7..931d0a9eb2ade 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -1,4 +1,4 @@ -//===--- OrcCBindingsStack.h - Orc JIT stack for C bindings ---*- C++ -*---===// +//===- OrcCBindingsStack.h - Orc JIT stack for C bindings -----*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -11,39 +11,63 @@ #define LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H #include "llvm-c/OrcBindings.h" -#include "llvm/ADT/Triple.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cstdint> +#include <functional> +#include <memory> +#include <set> +#include <string> +#include <vector> namespace llvm { class OrcCBindingsStack; +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::shared_ptr<Module>, + LLVMSharedModuleRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::shared_ptr<MemoryBuffer>, + LLVMSharedObjectBufferRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcCBindingsStack, LLVMOrcJITStackRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) class OrcCBindingsStack { public: - typedef orc::JITCompileCallbackManager CompileCallbackMgr; - typedef orc::RTDyldObjectLinkingLayer<> ObjLayerT; - typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT; - typedef orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr> - CODLayerT; - typedef std::function<std::unique_ptr<CompileCallbackMgr>()> - CallbackManagerBuilder; + using CompileCallbackMgr = orc::JITCompileCallbackManager; + using ObjLayerT = orc::RTDyldObjectLinkingLayer; + using CompileLayerT = orc::IRCompileLayer<ObjLayerT, orc::SimpleCompiler>; + using CODLayerT = + orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>; - typedef CODLayerT::IndirectStubsManagerBuilderT IndirectStubsManagerBuilder; + using CallbackManagerBuilder = + std::function<std::unique_ptr<CompileCallbackMgr>()>; + + using IndirectStubsManagerBuilder = CODLayerT::IndirectStubsManagerBuilderT; private: class GenericHandle { public: - virtual ~GenericHandle() {} + virtual ~GenericHandle() = default; + virtual JITSymbol findSymbolIn(const std::string &Name, bool ExportedSymbolsOnly) = 0; virtual void removeModule() = 0; @@ -51,7 +75,7 @@ private: template <typename LayerT> class GenericHandleImpl : public GenericHandle { public: - GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) + GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleHandleT Handle) : Layer(Layer), Handle(std::move(Handle)) {} JITSymbol findSymbolIn(const std::string &Name, @@ -59,31 +83,28 @@ private: return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly); } - void removeModule() override { return Layer.removeModuleSet(Handle); } + void removeModule() override { return Layer.removeModule(Handle); } private: LayerT &Layer; - typename LayerT::ModuleSetHandleT Handle; + typename LayerT::ModuleHandleT Handle; }; template <typename LayerT> std::unique_ptr<GenericHandleImpl<LayerT>> - createGenericHandle(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) { + createGenericHandle(LayerT &Layer, typename LayerT::ModuleHandleT Handle) { return llvm::make_unique<GenericHandleImpl<LayerT>>(Layer, std::move(Handle)); } public: - // We need a 'ModuleSetHandleT' to conform to the layer concept. - typedef unsigned ModuleSetHandleT; - - typedef unsigned ModuleHandleT; + using ModuleHandleT = unsigned; OrcCBindingsStack(TargetMachine &TM, std::unique_ptr<CompileCallbackMgr> CCMgr, IndirectStubsManagerBuilder IndirectStubsMgrBuilder) : DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()), - CCMgr(std::move(CCMgr)), ObjectLayer(), + CCMgr(std::move(CCMgr)), CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)), CODLayer(CompileLayer, [](Function &F) { return std::set<Function *>({&F}); }, @@ -153,7 +174,7 @@ public: if (ExternalResolver) return JITSymbol( ExternalResolver(Name.c_str(), ExternalResolverCtx), - llvm::JITSymbolFlags::Exported); + JITSymbolFlags::Exported); return JITSymbol(nullptr); }, @@ -163,11 +184,10 @@ public: } template <typename LayerT> - ModuleHandleT addIRModule(LayerT &Layer, Module *M, + ModuleHandleT addIRModule(LayerT &Layer, std::shared_ptr<Module> M, std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr, LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { - // Attach a data-layout if one isn't already present. if (M->getDataLayout().isDefault()) M->setDataLayout(DL); @@ -184,11 +204,8 @@ public: auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx); // Add the module to the JIT. - std::vector<Module *> S; - S.push_back(std::move(M)); - - auto LH = Layer.addModuleSet(std::move(S), std::move(MemMgr), - std::move(Resolver)); + auto LH = Layer.addModule(std::move(M), std::move(MemMgr), + std::move(Resolver)); ModuleHandleT H = createHandle(Layer, LH); // Run the static constructors, and save the static destructor runner for @@ -201,7 +218,7 @@ public: return H; } - ModuleHandleT addIRModuleEager(Module *M, + ModuleHandleT addIRModuleEager(std::shared_ptr<Module> M, LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return addIRModule(CompileLayer, std::move(M), @@ -209,7 +226,7 @@ public: std::move(ExternalResolver), ExternalResolverCtx); } - ModuleHandleT addIRModuleLazy(Module *M, + ModuleHandleT addIRModuleLazy(std::shared_ptr<Module> M, LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return addIRModule(CODLayer, std::move(M), @@ -238,8 +255,7 @@ public: private: template <typename LayerT> - unsigned createHandle(LayerT &Layer, - typename LayerT::ModuleSetHandleT Handle) { + unsigned createHandle(LayerT &Layer, typename LayerT::ModuleHandleT Handle) { unsigned NewHandle; if (!FreeHandleIndexes.empty()) { NewHandle = FreeHandleIndexes.back(); diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp index b7a68e041c128..f89f21adff417 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp @@ -124,5 +124,10 @@ OrcMCJITReplacement::runFunction(Function *F, llvm_unreachable("Full-featured argument passing not supported yet!"); } +void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) { + for (auto &M : LocalModules) + ExecutionEngine::runStaticConstructorsDestructors(*M, isDtors); +} + } // End namespace orc. } // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 7dd6b17d33cb4..b20690c7caafc 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -1,4 +1,4 @@ -//===---- OrcMCJITReplacement.h - Orc based MCJIT replacement ---*- C++ -*-===// +//===- OrcMCJITReplacement.h - Orc based MCJIT replacement ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,9 +24,12 @@ #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" @@ -45,6 +48,9 @@ #include <vector> namespace llvm { + +class ObjectCache; + namespace orc { class OrcMCJITReplacement : public ExecutionEngine { @@ -151,7 +157,6 @@ class OrcMCJITReplacement : public ExecutionEngine { }; private: - static ExecutionEngine * createOrcMCJITReplacement(std::string *ErrorMsg, std::shared_ptr<MCJITMemoryManager> MemMgr, @@ -162,10 +167,6 @@ private: } public: - static void Register() { - OrcMCJITReplacementCtor = createOrcMCJITReplacement; - } - OrcMCJITReplacement( std::shared_ptr<MCJITMemoryManager> MemMgr, std::shared_ptr<JITSymbolResolver> ClientResolver, @@ -178,8 +179,11 @@ public: CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)), LazyEmitLayer(CompileLayer) {} - void addModule(std::unique_ptr<Module> M) override { + static void Register() { + OrcMCJITReplacementCtor = createOrcMCJITReplacement; + } + void addModule(std::unique_ptr<Module> M) override { // If this module doesn't have a DataLayout attached then attach the // default. if (M->getDataLayout().isDefault()) { @@ -187,29 +191,44 @@ public: } else { assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch"); } - Modules.push_back(std::move(M)); - std::vector<Module *> Ms; - Ms.push_back(&*Modules.back()); - LazyEmitLayer.addModuleSet(std::move(Ms), &MemMgr, &Resolver); + auto *MPtr = M.release(); + ShouldDelete[MPtr] = true; + auto Deleter = + [this](Module *Mod) { + if (ShouldDelete[Mod]) + delete Mod; + }; + LocalModules.push_back(std::shared_ptr<Module>(MPtr, std::move(Deleter))); + LazyEmitLayer.addModule(LocalModules.back(), &MemMgr, &Resolver); } void addObjectFile(std::unique_ptr<object::ObjectFile> O) override { - std::vector<std::unique_ptr<object::ObjectFile>> Objs; - Objs.push_back(std::move(O)); - ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); + auto Obj = + std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O), + nullptr); + ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); } void addObjectFile(object::OwningBinary<object::ObjectFile> O) override { - std::vector<std::unique_ptr<object::OwningBinary<object::ObjectFile>>> Objs; - Objs.push_back( - llvm::make_unique<object::OwningBinary<object::ObjectFile>>( - std::move(O))); - ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); + auto Obj = + std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O)); + ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); } void addArchive(object::OwningBinary<object::Archive> A) override { Archives.push_back(std::move(A)); } + + bool removeModule(Module *M) override { + for (auto I = LocalModules.begin(), E = LocalModules.end(); I != E; ++I) { + if (I->get() == M) { + ShouldDelete[M] = false; + LocalModules.erase(I); + return true; + } + } + return false; + } uint64_t getSymbolAddress(StringRef Name) { return findSymbol(Name).getAddress(); @@ -256,13 +275,15 @@ public: ArrayRef<GenericValue> ArgValues) override; void setObjectCache(ObjectCache *NewCache) override { - CompileLayer.setObjectCache(NewCache); + CompileLayer.getCompiler().setObjectCache(NewCache); } void setProcessAllSections(bool ProcessAllSections) override { ObjectLayer.setProcessAllSections(ProcessAllSections); } + void runStaticConstructorsDestructors(bool isDtors) override; + private: JITSymbol findMangledSymbol(StringRef Name) { if (auto Sym = LazyEmitLayer.findSymbol(Name, false)) @@ -294,10 +315,12 @@ private: } std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get(); if (ChildBin->isObject()) { - std::vector<std::unique_ptr<object::ObjectFile>> ObjSet; - ObjSet.push_back(std::unique_ptr<object::ObjectFile>( - static_cast<object::ObjectFile *>(ChildBin.release()))); - ObjectLayer.addObjectSet(std::move(ObjSet), &MemMgr, &Resolver); + std::unique_ptr<object::ObjectFile> ChildObj( + static_cast<object::ObjectFile*>(ChildBinOrErr->release())); + auto Obj = + std::make_shared<object::OwningBinary<object::ObjectFile>>( + std::move(ChildObj), nullptr); + ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); if (auto Sym = ObjectLayer.findSymbol(Name, true)) return Sym; } @@ -308,34 +331,19 @@ private: class NotifyObjectLoadedT { public: - typedef std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>> - LoadedObjInfoListT; + using LoadedObjInfoListT = + std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>; NotifyObjectLoadedT(OrcMCJITReplacement &M) : M(M) {} - template <typename ObjListT> - void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H, - const ObjListT &Objects, - const LoadedObjInfoListT &Infos) const { + void operator()(RTDyldObjectLinkingLayerBase::ObjHandleT H, + const RTDyldObjectLinkingLayer::ObjectPtr &Obj, + const LoadedObjectInfo &Info) const { M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad); M.SectionsAllocatedSinceLastLoad = SectionAddrSet(); - assert(Objects.size() == Infos.size() && - "Incorrect number of Infos for Objects."); - for (unsigned I = 0; I < Objects.size(); ++I) - M.MemMgr.notifyObjectLoaded(&M, getObject(*Objects[I])); + M.MemMgr.notifyObjectLoaded(&M, *Obj->getBinary()); } - private: - static const object::ObjectFile& getObject(const object::ObjectFile &Obj) { - return Obj; - } - - template <typename ObjT> - static const object::ObjectFile& - getObject(const object::OwningBinary<ObjT> &Obj) { - return *Obj.getBinary(); - } - OrcMCJITReplacement &M; }; @@ -343,7 +351,7 @@ private: public: NotifyFinalizedT(OrcMCJITReplacement &M) : M(M) {} - void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H) { + void operator()(RTDyldObjectLinkingLayerBase::ObjHandleT H) { M.UnfinalizedSections.erase(H); } @@ -360,9 +368,9 @@ private: return MangledName; } - typedef RTDyldObjectLinkingLayer<NotifyObjectLoadedT> ObjectLayerT; - typedef IRCompileLayer<ObjectLayerT> CompileLayerT; - typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT; + using ObjectLayerT = RTDyldObjectLinkingLayer; + using CompileLayerT = IRCompileLayer<ObjectLayerT, orc::SimpleCompiler>; + using LazyEmitLayerT = LazyEmittingLayer<CompileLayerT>; std::unique_ptr<TargetMachine> TM; MCJITReplacementMemMgr MemMgr; @@ -380,21 +388,24 @@ private: // We need to store ObjLayerT::ObjSetHandles for each of the object sets // that have been emitted but not yet finalized so that we can forward the // mapSectionAddress calls appropriately. - typedef std::set<const void *> SectionAddrSet; - struct ObjSetHandleCompare { - bool operator()(ObjectLayerT::ObjSetHandleT H1, - ObjectLayerT::ObjSetHandleT H2) const { + using SectionAddrSet = std::set<const void *>; + struct ObjHandleCompare { + bool operator()(ObjectLayerT::ObjHandleT H1, + ObjectLayerT::ObjHandleT H2) const { return &*H1 < &*H2; } }; SectionAddrSet SectionsAllocatedSinceLastLoad; - std::map<ObjectLayerT::ObjSetHandleT, SectionAddrSet, ObjSetHandleCompare> + std::map<ObjectLayerT::ObjHandleT, SectionAddrSet, ObjHandleCompare> UnfinalizedSections; + std::map<Module*, bool> ShouldDelete; + std::vector<std::shared_ptr<Module>> LocalModules; std::vector<object::OwningBinary<object::Archive>> Archives; }; } // end namespace orc + } // end namespace llvm #endif // LLVM_LIB_EXECUTIONENGINE_ORC_MCJITREPLACEMENT_H diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 556e122ff82ff..c7f112887a306 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1,5 +1,4 @@ - -//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===// +//===- AsmWriter.cpp - Printing LLVM as an assembly file ------------------===// // // The LLVM Compiler Infrastructure // @@ -15,63 +14,105 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalIndirectSymbol.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" +#include "llvm/IR/Use.h" #include "llvm/IR/UseListOrder.h" -#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <cassert> #include <cctype> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; // Make virtual table appear in this compilation unit. -AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} +AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default; //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// namespace { + struct OrderMap { DenseMap<const Value *, std::pair<unsigned, bool>> IDs; unsigned size() const { return IDs.size(); } std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; } + std::pair<unsigned, bool> lookup(const Value *V) const { return IDs.lookup(V); } + void index(const Value *V) { // Explicitly sequence get-size and insert-value operations to avoid UB. unsigned ID = IDs.size() + 1; IDs[V].first = ID; } }; -} + +} // end anonymous namespace static void orderValue(const Value *V, OrderMap &OM) { if (OM.lookup(V).first) @@ -139,7 +180,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F, unsigned ID, const OrderMap &OM, UseListOrderStack &Stack) { // Predict use-list order for this one. - typedef std::pair<const Use *, unsigned> Entry; + using Entry = std::pair<const Use *, unsigned>; SmallVector<Entry, 64> List; for (const Use &U : V->uses()) // Check if this user will be serialized. @@ -421,13 +462,10 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) { isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix); } - namespace { + class TypePrinting { - TypePrinting(const TypePrinting &) = delete; - void operator=(const TypePrinting&) = delete; public: - /// NamedTypes - The named types that are used by the current module. TypeFinder NamedTypes; @@ -435,6 +473,8 @@ public: DenseMap<StructType*, unsigned> NumberedTypes; TypePrinting() = default; + TypePrinting(const TypePrinting &) = delete; + TypePrinting &operator=(const TypePrinting &) = delete; void incorporateTypes(const Module &M); @@ -442,7 +482,8 @@ public: void printStructBody(StructType *Ty, raw_ostream &OS); }; -} // namespace + +} // end anonymous namespace void TypePrinting::incorporateTypes(const Module &M) { NamedTypes.run(M, false); @@ -574,6 +615,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { } namespace llvm { + //===----------------------------------------------------------------------===// // SlotTracker Class: Enumerate slot numbers for unnamed values //===----------------------------------------------------------------------===// @@ -582,32 +624,33 @@ namespace llvm { class SlotTracker { public: /// ValueMap - A mapping of Values to slot numbers. - typedef DenseMap<const Value*, unsigned> ValueMap; + using ValueMap = DenseMap<const Value *, unsigned>; private: /// TheModule - The module for which we are holding slot numbers. const Module* TheModule; /// TheFunction - The function for which we are holding slot numbers. - const Function* TheFunction; - bool FunctionProcessed; + const Function* TheFunction = nullptr; + bool FunctionProcessed = false; bool ShouldInitializeAllMetadata; /// mMap - The slot map for the module level data. ValueMap mMap; - unsigned mNext; + unsigned mNext = 0; /// fMap - The slot map for the function level data. ValueMap fMap; - unsigned fNext; + unsigned fNext = 0; /// mdnMap - Map for MDNodes. DenseMap<const MDNode*, unsigned> mdnMap; - unsigned mdnNext; + unsigned mdnNext = 0; /// asMap - The slot map for attribute sets. DenseMap<AttributeSet, unsigned> asMap; - unsigned asNext; + unsigned asNext = 0; + public: /// Construct from a module. /// @@ -616,6 +659,7 @@ public: /// within a function (even if no functions have been initialized). explicit SlotTracker(const Module *M, bool ShouldInitializeAllMetadata = false); + /// Construct from a function, starting out in incorp state. /// /// If \c ShouldInitializeAllMetadata, initializes all metadata in all @@ -624,6 +668,9 @@ public: explicit SlotTracker(const Function *F, bool ShouldInitializeAllMetadata = false); + SlotTracker(const SlotTracker &) = delete; + SlotTracker &operator=(const SlotTracker &) = delete; + /// Return the slot number of the specified value in it's type /// plane. If something is not in the SlotTracker, return -1. int getLocalSlot(const Value *V); @@ -646,14 +693,16 @@ public: void purgeFunction(); /// MDNode map iterators. - typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator; + using mdn_iterator = DenseMap<const MDNode*, unsigned>::iterator; + mdn_iterator mdn_begin() { return mdnMap.begin(); } mdn_iterator mdn_end() { return mdnMap.end(); } unsigned mdn_size() const { return mdnMap.size(); } bool mdn_empty() const { return mdnMap.empty(); } /// AttributeSet map iterators. - typedef DenseMap<AttributeSet, unsigned>::iterator as_iterator; + using as_iterator = DenseMap<AttributeSet, unsigned>::iterator; + as_iterator as_begin() { return asMap.begin(); } as_iterator as_end() { return asMap.end(); } unsigned as_size() const { return asMap.size(); } @@ -691,11 +740,9 @@ private: /// Add all of the metadata from an instruction. void processInstructionMetadata(const Instruction &I); - - SlotTracker(const SlotTracker &) = delete; - void operator=(const SlotTracker &) = delete; }; -} // namespace llvm + +} // end namespace llvm ModuleSlotTracker::ModuleSlotTracker(SlotTracker &Machine, const Module *M, const Function *F) @@ -706,7 +753,7 @@ ModuleSlotTracker::ModuleSlotTracker(const Module *M, : ShouldCreateStorage(M), ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), M(M) {} -ModuleSlotTracker::~ModuleSlotTracker() {} +ModuleSlotTracker::~ModuleSlotTracker() = default; SlotTracker *ModuleSlotTracker::getMachine() { if (!ShouldCreateStorage) @@ -773,17 +820,13 @@ static SlotTracker *createSlotTracker(const Value *V) { // Module level constructor. Causes the contents of the Module (sans functions) // to be added to the slot table. SlotTracker::SlotTracker(const Module *M, bool ShouldInitializeAllMetadata) - : TheModule(M), TheFunction(nullptr), FunctionProcessed(false), - ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0), - fNext(0), mdnNext(0), asNext(0) {} + : TheModule(M), ShouldInitializeAllMetadata(ShouldInitializeAllMetadata) {} // Function level constructor. Causes the contents of the Module and the one // function provided to be added to the slot table. SlotTracker::SlotTracker(const Function *F, bool ShouldInitializeAllMetadata) : TheModule(F ? F->getParent() : nullptr), TheFunction(F), - FunctionProcessed(false), - ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0), - fNext(0), mdnNext(0), asNext(0) {} + ShouldInitializeAllMetadata(ShouldInitializeAllMetadata) {} inline void SlotTracker::initialize() { if (TheModule) { @@ -949,7 +992,6 @@ int SlotTracker::getMetadataSlot(const MDNode *N) { return MI == mdnMap.end() ? -1 : (int)MI->second; } - /// getLocalSlot - Get the slot number for a value that is local to a function. int SlotTracker::getLocalSlot(const Value *V) { assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!"); @@ -1248,7 +1290,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, return; } - if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) { if (CS->getType()->isPacked()) Out << '<'; @@ -1381,11 +1422,14 @@ static void writeMDTuple(raw_ostream &Out, const MDTuple *Node, } namespace { + struct FieldSeparator { - bool Skip; + bool Skip = true; const char *Sep; - FieldSeparator(const char *Sep = ", ") : Skip(true), Sep(Sep) {} + + FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} }; + raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { if (FS.Skip) { FS.Skip = false; @@ -1393,19 +1437,20 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { } return OS << FS.Sep; } + struct MDFieldPrinter { raw_ostream &Out; FieldSeparator FS; - TypePrinting *TypePrinter; - SlotTracker *Machine; - const Module *Context; + TypePrinting *TypePrinter = nullptr; + SlotTracker *Machine = nullptr; + const Module *Context = nullptr; - explicit MDFieldPrinter(raw_ostream &Out) - : Out(Out), TypePrinter(nullptr), Machine(nullptr), Context(nullptr) {} + explicit MDFieldPrinter(raw_ostream &Out) : Out(Out) {} MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) { } + void printTag(const DINode *N); void printMacinfoType(const DIMacroNode *N); void printChecksumKind(const DIFile *N); @@ -1422,7 +1467,8 @@ struct MDFieldPrinter { bool ShouldSkipZero = true); void printEmissionKind(StringRef Name, DICompileUnit::DebugEmissionKind EK); }; -} // end namespace + +} // end anonymous namespace void MDFieldPrinter::printTag(const DINode *N) { Out << FS << "tag: "; @@ -1518,7 +1564,6 @@ void MDFieldPrinter::printEmissionKind(StringRef Name, Out << FS << Name << ": " << DICompileUnit::EmissionKindString(EK); } - template <class IntTy, class Stringifier> void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString, bool ShouldSkipZero) { @@ -1923,7 +1968,6 @@ static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N, Out << ")"; } - static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, TypePrinting *TypePrinter, SlotTracker *Machine, @@ -2062,6 +2106,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, } namespace { + class AssemblyWriter { formatted_raw_ostream &Out; const Module *TheModule; @@ -2125,7 +2170,8 @@ private: // intrinsic indicating base and derived pointer names. void printGCRelocateComment(const GCRelocateInst &Relocate); }; -} // namespace + +} // end anonymous namespace AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M, AssemblyAnnotationWriter *AAW, @@ -2594,7 +2640,6 @@ void AssemblyWriter::printTypeIdentities() { } /// printFunction - Print all aspects of a function. -/// void AssemblyWriter::printFunction(const Function *F) { // Print out the return type and name. Out << '\n'; @@ -2730,7 +2775,6 @@ void AssemblyWriter::printFunction(const Function *F) { /// printArgument - This member is called for every argument that is passed into /// the function. Simply print it out -/// void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { // Output type... TypePrinter.print(Arg->getType(), Out); @@ -2747,7 +2791,6 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { } /// printBasicBlock - This member is called for each basic block in a method. -/// void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (BB->hasName()) { // Print out the label if it exists... Out << "\n"; @@ -2813,7 +2856,6 @@ void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) { /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. -/// void AssemblyWriter::printInfoComment(const Value &V) { if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V)) printGCRelocateComment(*Relocate); @@ -3046,7 +3088,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes()); writeOperandBundles(CI); - } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { Operand = II->getCalledValue(); FunctionType *FTy = II->getFunctionType(); @@ -3087,7 +3128,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(II->getNormalDest(), true); Out << " unwind "; writeOperand(II->getUnwindDest(), true); - } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { Out << ' '; if (AI->isUsedWithInAlloca()) @@ -3113,7 +3153,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) { if (AddrSpace != 0) { Out << ", addrspace(" << AddrSpace << ')'; } - } else if (isa<CastInst>(I)) { if (Operand) { Out << ' '; diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 4ed7b021883de..9c7b61f679236 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -100,6 +100,7 @@ public: class EnumAttributeImpl : public AttributeImpl { virtual void anchor(); + Attribute::AttrKind Kind; protected: @@ -133,6 +134,7 @@ public: class StringAttributeImpl : public AttributeImpl { virtual void anchor(); + std::string Kind; std::string Val; @@ -243,7 +245,8 @@ public: return AvailableFunctionAttrs & ((uint64_t)1) << Kind; } - typedef const AttributeSet *iterator; + using iterator = const AttributeSet *; + iterator begin() const { return getTrailingObjects<AttributeSet>(); } iterator end() const { return begin() + NumAttrSets; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index a518f7b5c81a8..8f2e641d64b92 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -1638,6 +1638,39 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { Caller.addFnAttr(Attribute::StackProtect); } +/// \brief If the inlined function required stack probes, then ensure that +/// the calling function has those too. +static void adjustCallerStackProbes(Function &Caller, const Function &Callee) { + if (!Caller.hasFnAttribute("probe-stack") && + Callee.hasFnAttribute("probe-stack")) { + Caller.addFnAttr(Callee.getFnAttribute("probe-stack")); + } +} + +/// \brief If the inlined function defines the size of guard region +/// on the stack, then ensure that the calling function defines a guard region +/// that is no larger. +static void +adjustCallerStackProbeSize(Function &Caller, const Function &Callee) { + if (Callee.hasFnAttribute("stack-probe-size")) { + uint64_t CalleeStackProbeSize; + Callee.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, CalleeStackProbeSize); + if (Caller.hasFnAttribute("stack-probe-size")) { + uint64_t CallerStackProbeSize; + Caller.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, CallerStackProbeSize); + if (CallerStackProbeSize > CalleeStackProbeSize) { + Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size")); + } + } else { + Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size")); + } + } +} + #define GET_ATTR_COMPAT_FUNC #include "AttributesCompatFunc.inc" diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 06934b365a11b..6a4b8032ffd54 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -142,6 +142,11 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 + Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 + Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 Name == "avx512.mask.add.pd.128" || // Added in 4.0 Name == "avx512.mask.add.pd.256" || // Added in 4.0 Name == "avx512.mask.add.ps.128" || // Added in 4.0 @@ -783,12 +788,30 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, } static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, - ICmpInst::Predicate Pred) { + unsigned CC, bool Signed) { Value *Op0 = CI.getArgOperand(0); unsigned NumElts = Op0->getType()->getVectorNumElements(); - Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); - Value *Mask = CI.getArgOperand(2); + Value *Cmp; + if (CC == 3) { + Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); + } else if (CC == 7) { + Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); + } else { + ICmpInst::Predicate Pred; + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case 0: Pred = ICmpInst::ICMP_EQ; break; + case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 4: Pred = ICmpInst::ICMP_NE; break; + case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + } + Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); + } + + Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); const auto *C = dyn_cast<Constant>(Mask); if (!C || !C->isAllOnesValue()) Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); @@ -1007,9 +1030,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." bool CmpEq = Name[16] == 'e'; - Rep = upgradeMaskedCompare(Builder, *CI, - CmpEq ? ICmpInst::ICMP_EQ - : ICmpInst::ICMP_SGT); + Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); + } else if (IsX86 && Name.startswith("avx512.mask.cmp")) { + unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); + } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { + unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); } else if (IsX86 && (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" || Name == "sse41.pmaxsd" || diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 1f8659d4e2cae..2b780adf6c69c 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -355,6 +355,19 @@ bool BasicBlock::canSplitPredecessors() const { return true; } +bool BasicBlock::isLegalToHoistInto() const { + auto *Term = getTerminator(); + // No terminator means the block is under construction. + if (!Term) + return true; + + // If the block has no successors, there can be no instructions to hoist. + assert(Term->getNumSuccessors() > 0); + + // Instructions should not be hoisted across exception handling boundaries. + return !Term->isExceptional(); +} + /// This splits a basic block into two at the specified /// instruction. Note that all instructions BEFORE the specified iterator stay /// as part of the original basic block, an unconditional branch is added to diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index 21d1996ef8514..4bd17257016d7 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -1,4 +1,4 @@ -//===-- ConstantRange.cpp - ConstantRange implementation ------------------===// +//===- ConstantRange.cpp - ConstantRange implementation -------------------===// // // The LLVM Compiler Infrastructure // @@ -21,12 +21,21 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> + using namespace llvm; ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) @@ -170,7 +179,7 @@ ConstantRange ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, const ConstantRange &Other, unsigned NoWrapKind) { - typedef OverflowingBinaryOperator OBO; + using OBO = OverflowingBinaryOperator; // Computes the intersection of CR0 and CR1. It is different from // intersectWith in that the ConstantRange returned will only contain elements @@ -284,27 +293,14 @@ APInt ConstantRange::getUnsignedMin() const { } APInt ConstantRange::getSignedMax() const { - if (!isWrappedSet()) { - APInt UpperMinusOne = getUpper() - 1; - if (getLower().sle(UpperMinusOne)) - return UpperMinusOne; - return APInt::getSignedMaxValue(getBitWidth()); - } - if (getLower().isNegative() == getUpper().isNegative()) + if (isFullSet() || Lower.sgt(Upper)) return APInt::getSignedMaxValue(getBitWidth()); return getUpper() - 1; } APInt ConstantRange::getSignedMin() const { - if (!isWrappedSet()) { - if (getLower().sle(getUpper() - 1)) - return getLower(); + if (isFullSet() || (Lower.sgt(Upper) && !getUpper().isMinSignedValue())) return APInt::getSignedMinValue(getBitWidth()); - } - if ((getUpper() - 1).slt(getLower())) { - if (!getUpper().isMinSignedValue()) - return APInt::getSignedMinValue(getBitWidth()); - } return getLower(); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 023a0b178a145..a79b00be4ffe8 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1995,8 +1995,8 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, InsertBefore) { Op<0>() = S1; Op<1>() = S2; - init(iType); setName(Name); + AssertOK(); } BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, @@ -2008,17 +2008,17 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, InsertAtEnd) { Op<0>() = S1; Op<1>() = S2; - init(iType); setName(Name); + AssertOK(); } -void BinaryOperator::init(BinaryOps iType) { +void BinaryOperator::AssertOK() { Value *LHS = getOperand(0), *RHS = getOperand(1); (void)LHS; (void)RHS; // Silence warnings. assert(LHS->getType() == RHS->getType() && "Binary operator operand types must match!"); #ifndef NDEBUG - switch (iType) { + switch (getOpcode()) { case Add: case Sub: case Mul: assert(getType() == LHS->getType() && @@ -2038,8 +2038,7 @@ void BinaryOperator::init(BinaryOps iType) { case SDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isIntegerTy() || (getType()->isVectorTy() && - cast<VectorType>(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Incorrect operand type (not integer) for S/UDIV"); break; case FDiv: @@ -2052,8 +2051,7 @@ void BinaryOperator::init(BinaryOps iType) { case SRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isIntegerTy() || (getType()->isVectorTy() && - cast<VectorType>(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Incorrect operand type (not integer) for S/UREM"); break; case FRem: @@ -2067,22 +2065,17 @@ void BinaryOperator::init(BinaryOps iType) { case AShr: assert(getType() == LHS->getType() && "Shift operation should return same type as operands!"); - assert((getType()->isIntegerTy() || - (getType()->isVectorTy() && - cast<VectorType>(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Tried to create a shift operation on a non-integral type!"); break; case And: case Or: case Xor: assert(getType() == LHS->getType() && "Logical operation should return same type as operands!"); - assert((getType()->isIntegerTy() || - (getType()->isVectorTy() && - cast<VectorType>(getType())->getElementType()->isIntegerTy())) && + assert(getType()->isIntOrIntVectorTy() && "Tried to create a logical operation on a non-integral type!"); break; - default: - break; + default: llvm_unreachable("Invalid opcode provided"); } #endif } diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index 4a30d28c39138..c19e1be44fdc7 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -1,4 +1,4 @@ -//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===// +//===- LLVMContextImpl.cpp - Implement LLVMContextImpl --------------------===// // // The LLVM Compiler Infrastructure // @@ -12,18 +12,17 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Module.h" #include "llvm/IR/OptBisect.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ManagedStatic.h" -#include <algorithm> +#include <cassert> +#include <utility> + using namespace llvm; LLVMContextImpl::LLVMContextImpl(LLVMContext &C) - : TheTrueVal(nullptr), TheFalseVal(nullptr), - VoidTy(C, Type::VoidTyID), + : VoidTy(C, Type::VoidTyID), LabelTy(C, Type::LabelTyID), HalfTy(C, Type::HalfTyID), FloatTy(C, Type::FloatTyID), @@ -39,17 +38,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), - Int128Ty(C, 128) { - InlineAsmDiagHandler = nullptr; - InlineAsmDiagContext = nullptr; - DiagnosticHandler = nullptr; - DiagnosticContext = nullptr; - RespectDiagnosticFilters = false; - DiagnosticHotnessRequested = false; - YieldCallback = nullptr; - YieldOpaqueHandle = nullptr; - NamedStructTypesUniqueID = 0; -} + Int128Ty(C, 128) {} LLVMContextImpl::~LLVMContextImpl() { // NOTE: We need to delete the contents of OwnedModules, but Module's dtor @@ -156,7 +145,6 @@ void LLVMContextImpl::dropTriviallyDeadConstantArrays() { C->destroyConstant(); } } - } while (Changed); } @@ -165,6 +153,7 @@ void Module::dropTriviallyDeadConstantArrays() { } namespace llvm { + /// \brief Make MDOperand transparent for hashing. /// /// This overload of an implementation detail of the hashing library makes @@ -179,7 +168,8 @@ namespace llvm { /// does not cause MDOperand to be transparent. In particular, a bare pointer /// doesn't get hashed before it's combined, whereas \a MDOperand would. static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); } -} + +} // end namespace llvm unsigned MDNodeOpsKey::calculateHash(MDNode *N, unsigned Offset) { unsigned Hash = hash_combine_range(N->op_begin() + Offset, N->op_end()); diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 4ba974409a4fc..4147f71ad9d2c 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -1,4 +1,4 @@ -//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===// +//===- LLVMContextImpl.h - The LLVMContextImpl opaque class -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,11 +21,16 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" @@ -33,21 +38,26 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/TrackingMDRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/YAMLTraits.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <utility> #include <vector> namespace llvm { -class ConstantInt; class ConstantFP; -class DiagnosticInfoOptimizationRemark; -class DiagnosticInfoOptimizationRemarkMissed; -class DiagnosticInfoOptimizationRemarkAnalysis; -class GCStrategy; -class LLVMContext; +class ConstantInt; class Type; class Value; +class ValueHandleBase; struct DenseMapAPIntKeyInfo { static inline APInt getEmptyKey() { @@ -55,14 +65,17 @@ struct DenseMapAPIntKeyInfo { V.U.VAL = 0; return V; } + static inline APInt getTombstoneKey() { APInt V(nullptr, 0); V.U.VAL = 1; return V; } + static unsigned getHashValue(const APInt &Key) { return static_cast<unsigned>(hash_value(Key)); } + static bool isEqual(const APInt &LHS, const APInt &RHS) { return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; } @@ -71,9 +84,11 @@ struct DenseMapAPIntKeyInfo { struct DenseMapAPFloatKeyInfo { static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); } static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus(), 2); } + static unsigned getHashValue(const APFloat &Key) { return static_cast<unsigned>(hash_value(Key)); } + static bool isEqual(const APFloat &LHS, const APFloat &RHS) { return LHS.bitwiseIsEqual(RHS); } @@ -83,10 +98,13 @@ struct AnonStructTypeKeyInfo { struct KeyTy { ArrayRef<Type*> ETypes; bool isPacked; + KeyTy(const ArrayRef<Type*>& E, bool P) : ETypes(E), isPacked(P) {} + KeyTy(const StructType *ST) : ETypes(ST->elements()), isPacked(ST->isPacked()) {} + bool operator==(const KeyTy& that) const { if (isPacked != that.isPacked) return false; @@ -98,25 +116,31 @@ struct AnonStructTypeKeyInfo { return !this->operator==(that); } }; + static inline StructType* getEmptyKey() { return DenseMapInfo<StructType*>::getEmptyKey(); } + static inline StructType* getTombstoneKey() { return DenseMapInfo<StructType*>::getTombstoneKey(); } + static unsigned getHashValue(const KeyTy& Key) { return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), Key.isPacked); } + static unsigned getHashValue(const StructType *ST) { return getHashValue(KeyTy(ST)); } + static bool isEqual(const KeyTy& LHS, const StructType *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey()) return false; return LHS == KeyTy(RHS); } + static bool isEqual(const StructType *LHS, const StructType *RHS) { return LHS == RHS; } @@ -127,11 +151,13 @@ struct FunctionTypeKeyInfo { const Type *ReturnType; ArrayRef<Type*> Params; bool isVarArg; + KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) : ReturnType(R), Params(P), isVarArg(V) {} KeyTy(const FunctionType *FT) : ReturnType(FT->getReturnType()), Params(FT->params()), isVarArg(FT->isVarArg()) {} + bool operator==(const KeyTy& that) const { if (ReturnType != that.ReturnType) return false; @@ -145,26 +171,32 @@ struct FunctionTypeKeyInfo { return !this->operator==(that); } }; + static inline FunctionType* getEmptyKey() { return DenseMapInfo<FunctionType*>::getEmptyKey(); } + static inline FunctionType* getTombstoneKey() { return DenseMapInfo<FunctionType*>::getTombstoneKey(); } + static unsigned getHashValue(const KeyTy& Key) { return hash_combine(Key.ReturnType, hash_combine_range(Key.Params.begin(), Key.Params.end()), Key.isVarArg); } + static unsigned getHashValue(const FunctionType *FT) { return getHashValue(KeyTy(FT)); } + static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey()) return false; return LHS == KeyTy(RHS); } + static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) { return LHS == RHS; } @@ -174,7 +206,6 @@ struct FunctionTypeKeyInfo { class MDNodeOpsKey { ArrayRef<Metadata *> RawOps; ArrayRef<MDOperand> Ops; - unsigned Hash; protected: @@ -212,14 +243,15 @@ public: }; template <class NodeTy> struct MDNodeKeyImpl; -template <class NodeTy> struct MDNodeInfo; /// Configuration point for MDNodeInfo::isEqual(). template <class NodeTy> struct MDNodeSubsetEqualImpl { - typedef MDNodeKeyImpl<NodeTy> KeyTy; + using KeyTy = MDNodeKeyImpl<NodeTy>; + static bool isSubsetEqual(const KeyTy &LHS, const NodeTy *RHS) { return false; } + static bool isSubsetEqual(const NodeTy *LHS, const NodeTy *RHS) { return false; } @@ -252,7 +284,6 @@ template <> struct MDNodeKeyImpl<DILocation> { MDNodeKeyImpl(unsigned Line, unsigned Column, Metadata *Scope, Metadata *InlinedAt) : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {} - MDNodeKeyImpl(const DILocation *L) : Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()), InlinedAt(L->getRawInlinedAt()) {} @@ -261,6 +292,7 @@ template <> struct MDNodeKeyImpl<DILocation> { return Line == RHS->getLine() && Column == RHS->getColumn() && Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt(); } + unsigned getHashValue() const { return hash_combine(Line, Column, Scope, InlinedAt); } @@ -270,6 +302,7 @@ template <> struct MDNodeKeyImpl<DILocation> { template <> struct MDNodeKeyImpl<GenericDINode> : MDNodeOpsKey { unsigned Tag; MDString *Header; + MDNodeKeyImpl(unsigned Tag, MDString *Header, ArrayRef<Metadata *> DwarfOps) : MDNodeOpsKey(DwarfOps), Tag(Tag), Header(Header) {} MDNodeKeyImpl(const GenericDINode *N) @@ -299,6 +332,7 @@ template <> struct MDNodeKeyImpl<DISubrange> { bool isKeyOf(const DISubrange *RHS) const { return Count == RHS->getCount() && LowerBound == RHS->getLowerBound(); } + unsigned getHashValue() const { return hash_combine(Count, LowerBound); } }; @@ -313,6 +347,7 @@ template <> struct MDNodeKeyImpl<DIEnumerator> { bool isKeyOf(const DIEnumerator *RHS) const { return Value == RHS->getValue() && Name == RHS->getRawName(); } + unsigned getHashValue() const { return hash_combine(Value, Name); } }; @@ -337,6 +372,7 @@ template <> struct MDNodeKeyImpl<DIBasicType> { AlignInBits == RHS->getAlignInBits() && Encoding == RHS->getEncoding(); } + unsigned getHashValue() const { return hash_combine(Tag, Name, SizeInBits, AlignInBits, Encoding); } @@ -384,6 +420,7 @@ template <> struct MDNodeKeyImpl<DIDerivedType> { Flags == RHS->getFlags() && ExtraData == RHS->getRawExtraData(); } + unsigned getHashValue() const { // If this is a member inside an ODR type, only hash the type and the name. // Otherwise the hash will be stronger than @@ -402,10 +439,12 @@ template <> struct MDNodeKeyImpl<DIDerivedType> { }; template <> struct MDNodeSubsetEqualImpl<DIDerivedType> { - typedef MDNodeKeyImpl<DIDerivedType> KeyTy; + using KeyTy = MDNodeKeyImpl<DIDerivedType>; + static bool isSubsetEqual(const KeyTy &LHS, const DIDerivedType *RHS) { return isODRMember(LHS.Tag, LHS.Scope, LHS.Name, RHS); } + static bool isSubsetEqual(const DIDerivedType *LHS, const DIDerivedType *RHS) { return isODRMember(LHS->getTag(), LHS->getRawScope(), LHS->getRawName(), RHS); @@ -480,6 +519,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> { TemplateParams == RHS->getRawTemplateParams() && Identifier == RHS->getRawIdentifier(); } + unsigned getHashValue() const { // Intentionally computes the hash on a subset of the operands for // performance reason. The subset has to be significant enough to avoid @@ -504,6 +544,7 @@ template <> struct MDNodeKeyImpl<DISubroutineType> { return Flags == RHS->getFlags() && CC == RHS->getCC() && TypeArray == RHS->getRawTypeArray(); } + unsigned getHashValue() const { return hash_combine(Flags, CC, TypeArray); } }; @@ -527,6 +568,7 @@ template <> struct MDNodeKeyImpl<DIFile> { CSKind == RHS->getChecksumKind() && Checksum == RHS->getRawChecksum(); } + unsigned getHashValue() const { return hash_combine(Filename, Directory, CSKind, Checksum); } @@ -601,6 +643,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> { Variables == RHS->getRawVariables() && ThrownTypes == RHS->getRawThrownTypes(); } + unsigned getHashValue() const { // If this is a declaration inside an ODR type, only hash the type and the // name. Otherwise the hash will be stronger than @@ -619,11 +662,13 @@ template <> struct MDNodeKeyImpl<DISubprogram> { }; template <> struct MDNodeSubsetEqualImpl<DISubprogram> { - typedef MDNodeKeyImpl<DISubprogram> KeyTy; + using KeyTy = MDNodeKeyImpl<DISubprogram>; + static bool isSubsetEqual(const KeyTy &LHS, const DISubprogram *RHS) { return isDeclarationOfODRMember(LHS.IsDefinition, LHS.Scope, LHS.LinkageName, LHS.TemplateParams, RHS); } + static bool isSubsetEqual(const DISubprogram *LHS, const DISubprogram *RHS) { return isDeclarationOfODRMember(LHS->isDefinition(), LHS->getRawScope(), LHS->getRawLinkageName(), @@ -672,6 +717,7 @@ template <> struct MDNodeKeyImpl<DILexicalBlock> { return Scope == RHS->getRawScope() && File == RHS->getRawFile() && Line == RHS->getLine() && Column == RHS->getColumn(); } + unsigned getHashValue() const { return hash_combine(Scope, File, Line, Column); } @@ -692,6 +738,7 @@ template <> struct MDNodeKeyImpl<DILexicalBlockFile> { return Scope == RHS->getRawScope() && File == RHS->getRawFile() && Discriminator == RHS->getDiscriminator(); } + unsigned getHashValue() const { return hash_combine(Scope, File, Discriminator); } @@ -712,6 +759,7 @@ template <> struct MDNodeKeyImpl<DINamespace> { return Scope == RHS->getRawScope() && Name == RHS->getRawName() && ExportSymbols == RHS->getExportSymbols(); } + unsigned getHashValue() const { return hash_combine(Scope, Name); } @@ -723,6 +771,7 @@ template <> struct MDNodeKeyImpl<DIModule> { MDString *ConfigurationMacros; MDString *IncludePath; MDString *ISysRoot; + MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *ConfigurationMacros, MDString *IncludePath, MDString *ISysRoot) : Scope(Scope), Name(Name), ConfigurationMacros(ConfigurationMacros), @@ -738,6 +787,7 @@ template <> struct MDNodeKeyImpl<DIModule> { IncludePath == RHS->getRawIncludePath() && ISysRoot == RHS->getRawISysRoot(); } + unsigned getHashValue() const { return hash_combine(Scope, Name, ConfigurationMacros, IncludePath, ISysRoot); @@ -755,6 +805,7 @@ template <> struct MDNodeKeyImpl<DITemplateTypeParameter> { bool isKeyOf(const DITemplateTypeParameter *RHS) const { return Name == RHS->getRawName() && Type == RHS->getRawType(); } + unsigned getHashValue() const { return hash_combine(Name, Type); } }; @@ -774,6 +825,7 @@ template <> struct MDNodeKeyImpl<DITemplateValueParameter> { return Tag == RHS->getTag() && Name == RHS->getRawName() && Type == RHS->getRawType() && Value == RHS->getValue(); } + unsigned getHashValue() const { return hash_combine(Tag, Name, Type, Value); } }; @@ -816,6 +868,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> { RHS->getRawStaticDataMemberDeclaration() && AlignInBits == RHS->getAlignInBits(); } + unsigned getHashValue() const { // We do not use AlignInBits in hashing function here on purpose: // in most cases this param for local variable is zero (for function param @@ -856,6 +909,7 @@ template <> struct MDNodeKeyImpl<DILocalVariable> { Type == RHS->getRawType() && Arg == RHS->getArg() && Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits(); } + unsigned getHashValue() const { // We do not use AlignInBits in hashing function here on purpose: // in most cases this param for local variable is zero (for function param @@ -877,6 +931,7 @@ template <> struct MDNodeKeyImpl<DIExpression> { bool isKeyOf(const DIExpression *RHS) const { return Elements == RHS->getElements(); } + unsigned getHashValue() const { return hash_combine_range(Elements.begin(), Elements.end()); } @@ -895,6 +950,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariableExpression> { return Variable == RHS->getRawVariable() && Expression == RHS->getRawExpression(); } + unsigned getHashValue() const { return hash_combine(Variable, Expression); } }; @@ -923,6 +979,7 @@ template <> struct MDNodeKeyImpl<DIObjCProperty> { SetterName == RHS->getRawSetterName() && Attributes == RHS->getAttributes() && Type == RHS->getRawType(); } + unsigned getHashValue() const { return hash_combine(Name, File, Line, GetterName, SetterName, Attributes, Type); @@ -948,6 +1005,7 @@ template <> struct MDNodeKeyImpl<DIImportedEntity> { Entity == RHS->getRawEntity() && Line == RHS->getLine() && Name == RHS->getRawName(); } + unsigned getHashValue() const { return hash_combine(Tag, Scope, Entity, Line, Name); } @@ -969,6 +1027,7 @@ template <> struct MDNodeKeyImpl<DIMacro> { return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && Name == RHS->getRawName() && Value == RHS->getRawValue(); } + unsigned getHashValue() const { return hash_combine(MIType, Line, Name, Value); } @@ -991,6 +1050,7 @@ template <> struct MDNodeKeyImpl<DIMacroFile> { return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && File == RHS->getRawFile() && Elements == RHS->getRawElements(); } + unsigned getHashValue() const { return hash_combine(MIType, Line, File, Elements); } @@ -998,23 +1058,29 @@ template <> struct MDNodeKeyImpl<DIMacroFile> { /// \brief DenseMapInfo for MDNode subclasses. template <class NodeTy> struct MDNodeInfo { - typedef MDNodeKeyImpl<NodeTy> KeyTy; - typedef MDNodeSubsetEqualImpl<NodeTy> SubsetEqualTy; + using KeyTy = MDNodeKeyImpl<NodeTy>; + using SubsetEqualTy = MDNodeSubsetEqualImpl<NodeTy>; + static inline NodeTy *getEmptyKey() { return DenseMapInfo<NodeTy *>::getEmptyKey(); } + static inline NodeTy *getTombstoneKey() { return DenseMapInfo<NodeTy *>::getTombstoneKey(); } + static unsigned getHashValue(const KeyTy &Key) { return Key.getHashValue(); } + static unsigned getHashValue(const NodeTy *N) { return KeyTy(N).getHashValue(); } + static bool isEqual(const KeyTy &LHS, const NodeTy *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey()) return false; return SubsetEqualTy::isSubsetEqual(LHS, RHS) || LHS.isKeyOf(RHS); } + static bool isEqual(const NodeTy *LHS, const NodeTy *RHS) { if (LHS == RHS) return true; @@ -1024,7 +1090,7 @@ template <class NodeTy> struct MDNodeInfo { } }; -#define HANDLE_MDNODE_LEAF(CLASS) typedef MDNodeInfo<CLASS> CLASS##Info; +#define HANDLE_MDNODE_LEAF(CLASS) using CLASS##Info = MDNodeInfo<CLASS>; #include "llvm/IR/Metadata.def" /// \brief Map-like storage for metadata attachments. @@ -1097,24 +1163,24 @@ public: /// will be automatically deleted if this context is deleted. SmallPtrSet<Module*, 4> OwnedModules; - LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler; - void *InlineAsmDiagContext; + LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler = nullptr; + void *InlineAsmDiagContext = nullptr; - LLVMContext::DiagnosticHandlerTy DiagnosticHandler; - void *DiagnosticContext; - bool RespectDiagnosticFilters; - bool DiagnosticHotnessRequested; + LLVMContext::DiagnosticHandlerTy DiagnosticHandler = nullptr; + void *DiagnosticContext = nullptr; + bool RespectDiagnosticFilters = false; + bool DiagnosticHotnessRequested = false; std::unique_ptr<yaml::Output> DiagnosticsOutputFile; - LLVMContext::YieldCallbackTy YieldCallback; - void *YieldOpaqueHandle; + LLVMContext::YieldCallbackTy YieldCallback = nullptr; + void *YieldOpaqueHandle = nullptr; - typedef DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo> - IntMapTy; + using IntMapTy = + DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>; IntMapTy IntConstants; - typedef DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo> - FPMapTy; + using FPMapTy = + DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo>; FPMapTy FPConstants; FoldingSet<AttributeImpl> AttrsSet; @@ -1142,13 +1208,13 @@ public: DenseMap<Type *, std::unique_ptr<ConstantAggregateZero>> CAZConstants; - typedef ConstantUniqueMap<ConstantArray> ArrayConstantsTy; + using ArrayConstantsTy = ConstantUniqueMap<ConstantArray>; ArrayConstantsTy ArrayConstants; - typedef ConstantUniqueMap<ConstantStruct> StructConstantsTy; + using StructConstantsTy = ConstantUniqueMap<ConstantStruct>; StructConstantsTy StructConstants; - typedef ConstantUniqueMap<ConstantVector> VectorConstantsTy; + using VectorConstantsTy = ConstantUniqueMap<ConstantVector>; VectorConstantsTy VectorConstants; DenseMap<PointerType *, std::unique_ptr<ConstantPointerNull>> CPNConstants; @@ -1163,8 +1229,8 @@ public: ConstantUniqueMap<InlineAsm> InlineAsms; - ConstantInt *TheTrueVal; - ConstantInt *TheFalseVal; + ConstantInt *TheTrueVal = nullptr; + ConstantInt *TheFalseVal = nullptr; std::unique_ptr<ConstantTokenNone> TheNoneToken; @@ -1172,7 +1238,6 @@ public: Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy; Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; - /// TypeAllocator - All dynamically allocated types are allocated from this. /// They live forever until the context is torn down. @@ -1180,23 +1245,22 @@ public: DenseMap<unsigned, IntegerType*> IntegerTypes; - typedef DenseSet<FunctionType *, FunctionTypeKeyInfo> FunctionTypeSet; + using FunctionTypeSet = DenseSet<FunctionType *, FunctionTypeKeyInfo>; FunctionTypeSet FunctionTypes; - typedef DenseSet<StructType *, AnonStructTypeKeyInfo> StructTypeSet; + using StructTypeSet = DenseSet<StructType *, AnonStructTypeKeyInfo>; StructTypeSet AnonStructTypes; StringMap<StructType*> NamedStructTypes; - unsigned NamedStructTypesUniqueID; + unsigned NamedStructTypesUniqueID = 0; DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes; DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes; DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0 DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes; - /// ValueHandles - This map keeps track of all of the value handles that are /// watching a Value*. The Value::HasValueHandle bit is used to know /// whether or not a value has an entry in this map. - typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy; + using ValueHandlesTy = DenseMap<Value *, ValueHandleBase *>; ValueHandlesTy ValueHandles; /// CustomMDKindNames - Map to hold the metadata string to ID mapping. @@ -1254,6 +1318,6 @@ public: OptBisect &getOptBisect(); }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_IR_LLVMCONTEXTIMPL_H diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 92e5798dcf214..ac02ff76c8436 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Metadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" #include "SymbolTableListTraitsImpl.h" @@ -27,6 +26,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -39,6 +39,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" @@ -53,6 +54,7 @@ #include <cstdint> #include <iterator> #include <tuple> +#include <type_traits> #include <utility> #include <vector> @@ -233,7 +235,7 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) { return; // Copy out uses since UseMap will get touched below. - typedef std::pair<void *, std::pair<OwnerTy, uint64_t>> UseTy; + using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>; SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end()); std::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) { return L.second.second < R.second.second; @@ -286,7 +288,7 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) { } // Copy out uses since UseMap could get touched below. - typedef std::pair<void *, std::pair<OwnerTy, uint64_t>> UseTy; + using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>; SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end()); std::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) { return L.second.second < R.second.second; @@ -758,8 +760,8 @@ static T *uniquifyImpl(T *N, DenseSet<T *, InfoT> &Store) { } template <class NodeTy> struct MDNode::HasCachedHash { - typedef char Yes[1]; - typedef char No[2]; + using Yes = char[1]; + using No = char[2]; template <class U, U Val> struct SFINAE {}; template <class U> @@ -1484,7 +1486,7 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) { addMetadata( LLVMContext::MD_type, *MDTuple::get(getContext(), - {ConstantAsMetadata::get(llvm::ConstantInt::get( + {ConstantAsMetadata::get(ConstantInt::get( Type::getInt64Ty(getContext()), Offset)), TypeID})); } diff --git a/lib/IR/Statepoint.cpp b/lib/IR/Statepoint.cpp index 8c3f0f208cc67..18efee2177c34 100644 --- a/lib/IR/Statepoint.cpp +++ b/lib/IR/Statepoint.cpp @@ -44,10 +44,22 @@ bool llvm::isGCRelocate(ImmutableCallSite CS) { return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction()); } +bool llvm::isGCRelocate(const Value *V) { + if (auto CS = ImmutableCallSite(V)) + return isGCRelocate(CS); + return false; +} + bool llvm::isGCResult(ImmutableCallSite CS) { return CS.getInstruction() && isa<GCResultInst>(CS.getInstruction()); } +bool llvm::isGCResult(const Value *V) { + if (auto CS = ImmutableCallSite(V)) + return isGCResult(CS); + return false; +} + bool llvm::isStatepointDirectiveAttr(Attribute Attr) { return Attr.hasAttribute("statepoint-id") || Attr.hasAttribute("statepoint-num-patch-bytes"); diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index e4094d44867b2..1efd481b246c8 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -32,7 +32,6 @@ #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/LTO/LTO.h" -#include "llvm/Linker/Linker.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Support/CachePruning.h" diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index a86fd383003da..562f136a3ce2b 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -45,13 +45,13 @@ add_llvm_library(LLVMMC MCWasmObjectTargetWriter.cpp MCWasmStreamer.cpp MCWin64EH.cpp + MCWinCOFFStreamer.cpp MCWinEH.cpp MachObjectWriter.cpp StringTableBuilder.cpp SubtargetFeature.cpp WasmObjectWriter.cpp WinCOFFObjectWriter.cpp - WinCOFFStreamer.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/MC diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 4d139132df462..30f357826805a 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -633,9 +633,6 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, MCContext &Ctx = Asm.getContext(); if (const MCSymbolRefExpr *RefB = Target.getSymB()) { - assert(RefB->getKind() == MCSymbolRefExpr::VK_None && - "Should not have constructed this"); - // Let A, B and C being the components of Target and R be the location of // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). // If it is pcrel, we want to compute (A - B + C - R). diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 53cdaac3aa54b..92c5da0e9fef9 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -193,14 +193,23 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, // FIXME: This code has some duplication with recordRelocation. We should // probably merge the two into a single callback that tries to evaluate a // fixup and records a relocation if one is needed. + + // On error claim to have completely evaluated the fixup, to prevent any + // further processing from being done. const MCExpr *Expr = Fixup.getValue(); + MCContext &Ctx = getContext(); + Value = 0; if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) { - getContext().reportError(Fixup.getLoc(), "expected relocatable expression"); - // Claim to have completely evaluated the fixup, to prevent any further - // processing from being done. - Value = 0; + Ctx.reportError(Fixup.getLoc(), "expected relocatable expression"); return true; } + if (const MCSymbolRefExpr *RefB = Target.getSymB()) { + if (RefB->getKind() != MCSymbolRefExpr::VK_None) { + Ctx.reportError(Fixup.getLoc(), + "unsupported subtraction of qualified symbol"); + return true; + } + } bool IsPCRel = Backend.getFixupKindInfo( Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; @@ -254,8 +263,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, // Let the backend adjust the fixup value if necessary, including whether // we need a relocation. - Backend.processFixupValue(*this, Layout, Fixup, DF, Target, Value, - IsResolved); + Backend.processFixupValue(*this, Fixup, Target, IsResolved); return IsResolved; } @@ -639,9 +647,9 @@ void MCAssembler::writeSectionData(const MCSection *Sec, Layout.getSectionAddressSize(Sec)); } -std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout, - MCFragment &F, - const MCFixup &Fixup) { +std::tuple<MCValue, uint64_t, bool> +MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F, + const MCFixup &Fixup) { // Evaluate the fixup. MCValue Target; uint64_t FixedValue; @@ -654,7 +662,7 @@ std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout, getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel, FixedValue); } - return std::make_pair(FixedValue, IsPCRel); + return std::make_tuple(Target, FixedValue, IsPCRel); } void MCAssembler::layout(MCAsmLayout &Layout) { @@ -731,9 +739,11 @@ void MCAssembler::layout(MCAsmLayout &Layout) { for (const MCFixup &Fixup : Fixups) { uint64_t FixedValue; bool IsPCRel; - std::tie(FixedValue, IsPCRel) = handleFixup(Layout, Frag, Fixup); - getBackend().applyFixup(Fixup, Contents.data(), Contents.size(), - FixedValue, IsPCRel, getContext()); + MCValue Target; + std::tie(Target, FixedValue, IsPCRel) = + handleFixup(Layout, Frag, Fixup); + getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue, + IsPCRel); } } } diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp index f3d0eb55eecd4..6e0249377a899 100644 --- a/lib/MC/MCFragment.cpp +++ b/lib/MC/MCFragment.cpp @@ -307,7 +307,7 @@ raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { } // end namespace llvm #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MCFragment::dump() { +LLVM_DUMP_METHOD void MCFragment::dump() const { raw_ostream &OS = errs(); OS << "<"; @@ -328,9 +328,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() { case MCFragment::FT_Dummy: OS << "MCDummyFragment"; break; } - OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder + OS << "<MCFragment " << (const void*) this << " LayoutOrder:" << LayoutOrder << " Offset:" << Offset - << " HasInstructions:" << hasInstructions() + << " HasInstructions:" << hasInstructions() << " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">"; switch (getKind()) { @@ -382,7 +382,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() { } case MCFragment::FT_Fill: { const MCFillFragment *FF = cast<MCFillFragment>(this); - OS << " Value:" << FF->getValue() << " Size:" << FF->getSize(); + OS << " Value:" << static_cast<unsigned>(FF->getValue()) + << " Size:" << FF->getSize(); break; } case MCFragment::FT_Relaxable: { @@ -395,7 +396,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() { case MCFragment::FT_Org: { const MCOrgFragment *OF = cast<MCOrgFragment>(this); OS << "\n "; - OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue(); + OS << " Offset:" << OF->getOffset() + << " Value:" << static_cast<unsigned>(OF->getValue()); break; } case MCFragment::FT_Dwarf: { @@ -445,19 +447,19 @@ LLVM_DUMP_METHOD void MCFragment::dump() { OS << ">"; } -LLVM_DUMP_METHOD void MCAssembler::dump() { +LLVM_DUMP_METHOD void MCAssembler::dump() const{ raw_ostream &OS = errs(); OS << "<MCAssembler\n"; OS << " Sections:[\n "; - for (iterator it = begin(), ie = end(); it != ie; ++it) { + for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if (it != begin()) OS << ",\n "; it->dump(); } OS << "],\n"; OS << " Symbols:["; - for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { + for (const_symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { if (it != symbol_begin()) OS << ",\n "; OS << "("; it->dump(); diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index b961cb3968e86..d141dd6627c46 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -86,7 +86,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MCSection::dump() { +LLVM_DUMP_METHOD void MCSection::dump() const { raw_ostream &OS = errs(); OS << "<MCSection"; diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp index 59b62b8d37c30..02fa070f0c57d 100644 --- a/lib/MC/MCWasmStreamer.cpp +++ b/lib/MC/MCWasmStreamer.cpp @@ -98,18 +98,30 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_WeakDefAutoPrivate: case MCSA_Invalid: case MCSA_IndirectSymbol: + case MCSA_Hidden: return false; + + case MCSA_Weak: + case MCSA_WeakReference: + Symbol->setWeak(true); + Symbol->setExternal(true); + break; + case MCSA_Global: Symbol->setExternal(true); break; + case MCSA_ELF_TypeFunction: Symbol->setIsFunction(true); break; + case MCSA_ELF_TypeObject: Symbol->setIsFunction(false); break; + default: // unrecognized directive + llvm_unreachable("unexpected MCSymbolAttr"); return false; } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp index b4d0d7a87f1d3..bf341bb1f4511 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/MCWinCOFFStreamer.cpp @@ -1,4 +1,4 @@ -//===- llvm/MC/WinCOFFStreamer.cpp ----------------------------------------===// +//===- llvm/MC/MCWinCOFFStreamer.cpp --------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -190,7 +190,8 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { << COFF::SCT_COMPLEX_TYPE_SHIFT); } -void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) { + visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_2); @@ -198,8 +199,9 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { DF->getContents().resize(DF->getContents().size() + 2, 0); } -void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, +void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol, uint64_t Offset) { + visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); // Create Symbol A for the relocation relative reference. const MCExpr *MCE = MCSymbolRefExpr::create(Symbol, getContext()); diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index db304c027f991..45534ba182123 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -129,15 +129,15 @@ struct WasmGlobal { // Information about a single relocation. struct WasmRelocationEntry { - uint64_t Offset; // Where is the relocation. - const MCSymbolWasm *Symbol; // The symbol to relocate with. - int64_t Addend; // A value to add to the symbol. - unsigned Type; // The type of the relocation. - MCSectionWasm *FixupSection;// The section the relocation is targeting. + uint64_t Offset; // Where is the relocation. + const MCSymbolWasm *Symbol; // The symbol to relocate with. + int64_t Addend; // A value to add to the symbol. + unsigned Type; // The type of the relocation. + const MCSectionWasm *FixupSection;// The section the relocation is targeting. WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, int64_t Addend, unsigned Type, - MCSectionWasm *FixupSection) + const MCSectionWasm *FixupSection) : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), FixupSection(FixupSection) {} @@ -156,9 +156,19 @@ struct WasmRelocationEntry { Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend << ", Type=" << Type << ", FixupSection=" << FixupSection; } - void dump() const { print(errs()); } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { print(dbgs()); } +#endif }; +#if !defined(NDEBUG) +raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) { + Rel.print(OS); + return OS; +} +#endif + class WasmObjectWriter : public MCObjectWriter { /// Helper struct for containing some precomputed information on symbols. struct WasmSymbolData { @@ -229,6 +239,11 @@ private: void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void writeString(const StringRef Str) { + encodeULEB128(Str.size(), getStream()); + writeBytes(Str); + } + void writeValueType(wasm::ValType Ty) { encodeSLEB128(int32_t(Ty), getStream()); } @@ -250,7 +265,8 @@ private: uint32_t NumFuncImports); void writeCodeRelocSection(); void writeDataRelocSection(uint64_t DataSectionHeaderSize); - void writeLinkingMetaDataSection(bool HasStackPointer, + void writeLinkingMetaDataSection(ArrayRef<StringRef> WeakSymbols, + bool HasStackPointer, uint32_t StackPointerGlobal); void applyRelocations(ArrayRef<WasmRelocationEntry> Relocations, @@ -282,6 +298,7 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section, assert((Name != nullptr) == (SectionId == wasm::WASM_SEC_CUSTOM) && "Only custom sections can have names"); + DEBUG(dbgs() << "startSection " << SectionId << ": " << Name << "\n"); encodeULEB128(SectionId, getStream()); Section.SizeOffset = getStream().tell(); @@ -295,8 +312,8 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section, // Custom sections in wasm also have a string identifier. if (SectionId == wasm::WASM_SEC_CUSTOM) { - encodeULEB128(strlen(Name), getStream()); - writeBytes(Name); + assert(Name); + writeString(StringRef(Name)); } } @@ -307,6 +324,7 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) { if (uint32_t(Size) != Size) report_fatal_error("section size does not fit in a uint32_t"); + DEBUG(dbgs() << "endSection size=" << Size << "\n"); unsigned Padding = PaddingFor5ByteULEB128(Size); // Write the final section size to the payload_len field, which follows @@ -332,7 +350,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { - MCSectionWasm &FixupSection = cast<MCSectionWasm>(*Fragment->getParent()); + const auto &FixupSection = cast<MCSectionWasm>(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); @@ -406,9 +424,12 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, } assert(!IsPCRel); + assert(SymA); + unsigned Type = getRelocType(Target, Fixup); WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); + DEBUG(dbgs() << "WasmReloc: " << Rec << "\n"); if (FixupSection.hasInstructions()) CodeRelocations.push_back(Rec); @@ -453,11 +474,10 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) { const MCSymbolWasm *Sym = RelEntry.Symbol; // For undefined symbols, use a hopefully invalid value. - if (!Sym->isDefined(false)) + if (!Sym->isDefined(/*SetUsed=*/false)) return UINT32_MAX; - MCSectionWasm &Section = - cast<MCSectionWasm>(RelEntry.Symbol->getSection(false)); + const auto &Section = cast<MCSectionWasm>(RelEntry.Symbol->getSection(false)); uint64_t Address = Section.getSectionOffset() + RelEntry.Addend; // Ignore overflow. LLVM allows address arithmetic to silently wrap. @@ -471,16 +491,23 @@ uint32_t WasmObjectWriter::getRelocationIndexValue( switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: - assert(IndirectSymbolIndices.count(RelEntry.Symbol)); + if (!IndirectSymbolIndices.count(RelEntry.Symbol)) + report_fatal_error("symbol not found table index space:" + + RelEntry.Symbol->getName()); return IndirectSymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: - assert(SymbolIndices.count(RelEntry.Symbol)); + if (!SymbolIndices.count(RelEntry.Symbol)) + report_fatal_error("symbol not found function/global index space:" + + RelEntry.Symbol->getName()); return SymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: - assert(TypeIndices.count(RelEntry.Symbol)); + if (!TypeIndices.count(RelEntry.Symbol)) + report_fatal_error("symbol not found in type index space:" + + RelEntry.Symbol->getName()); return TypeIndices[RelEntry.Symbol]; default: llvm_unreachable("invalid relocation type"); @@ -497,10 +524,12 @@ void WasmObjectWriter::applyRelocations( RelEntry.FixupSection->getSectionOffset() + RelEntry.Offset; + DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: - case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: { + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: { uint32_t Index = getRelocationIndexValue(RelEntry); WritePatchableSLEB(Stream, Index, Offset); break; @@ -526,7 +555,7 @@ void WasmObjectWriter::applyRelocations( break; } default: - llvm_unreachable("unsupported relocation type"); + llvm_unreachable("invalid relocation type"); } } } @@ -573,6 +602,7 @@ void WasmObjectWriter::writeTypeSection( endSection(Section); } + void WasmObjectWriter::writeImportSection( const SmallVector<WasmImport, 4> &Imports) { if (Imports.empty()) @@ -583,13 +613,8 @@ void WasmObjectWriter::writeImportSection( encodeULEB128(Imports.size(), getStream()); for (const WasmImport &Import : Imports) { - StringRef ModuleName = Import.ModuleName; - encodeULEB128(ModuleName.size(), getStream()); - writeBytes(ModuleName); - - StringRef FieldName = Import.FieldName; - encodeULEB128(FieldName.size(), getStream()); - writeBytes(FieldName); + writeString(Import.ModuleName); + writeString(Import.FieldName); encodeULEB128(Import.Kind, getStream()); @@ -697,11 +722,8 @@ void WasmObjectWriter::writeExportSection( encodeULEB128(Exports.size(), getStream()); for (const WasmExport &Export : Exports) { - encodeULEB128(Export.FieldName.size(), getStream()); - writeBytes(Export.FieldName); - + writeString(Export.FieldName); encodeSLEB128(Export.Kind, getStream()); - encodeULEB128(Export.Index, getStream()); } @@ -743,17 +765,7 @@ void WasmObjectWriter::writeCodeSection( encodeULEB128(Functions.size(), getStream()); for (const WasmFunction &Func : Functions) { - MCSectionWasm &FuncSection = - static_cast<MCSectionWasm &>(Func.Sym->getSection()); - - if (Func.Sym->isVariable()) - report_fatal_error("weak symbols not supported yet"); - - if (Func.Sym->getOffset() != 0) - report_fatal_error("function sections must contain one function each"); - - if (!Func.Sym->getSize()) - report_fatal_error("function symbols must have a size set with .size"); + auto &FuncSection = static_cast<MCSectionWasm &>(Func.Sym->getSection()); int64_t Size = 0; if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) @@ -761,8 +773,7 @@ void WasmObjectWriter::writeCodeSection( encodeULEB128(Size, getStream()); - FuncSection.setSectionOffset(getStream().tell() - - Section.ContentsOffset); + FuncSection.setSectionOffset(getStream().tell() - Section.ContentsOffset); Asm.writeSectionData(&FuncSection, Layout); } @@ -815,15 +826,13 @@ void WasmObjectWriter::writeNameSection( for (const WasmImport &Import : Imports) { if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { encodeULEB128(Index, getStream()); - encodeULEB128(Import.FieldName.size(), getStream()); - writeBytes(Import.FieldName); + writeString(Import.FieldName); ++Index; } } for (const WasmFunction &Func : Functions) { encodeULEB128(Index, getStream()); - encodeULEB128(Func.Sym->getName().size(), getStream()); - writeBytes(Func.Sym->getName()); + writeString(Func.Sym->getName()); ++Index; } @@ -868,22 +877,37 @@ void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) { } void WasmObjectWriter::writeLinkingMetaDataSection( - bool HasStackPointer, uint32_t StackPointerGlobal) { - if (!HasStackPointer) + ArrayRef<StringRef> WeakSymbols, bool HasStackPointer, + uint32_t StackPointerGlobal) { + if (!HasStackPointer && WeakSymbols.empty()) return; + SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); + SectionBookkeeping SubSection; - encodeULEB128(1, getStream()); // count + if (HasStackPointer) { + startSection(SubSection, wasm::WASM_STACK_POINTER); + encodeULEB128(StackPointerGlobal, getStream()); // id + endSection(SubSection); + } - encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type - encodeULEB128(StackPointerGlobal, getStream()); // id + if (WeakSymbols.size() != 0) { + startSection(SubSection, wasm::WASM_SYMBOL_INFO); + encodeULEB128(WeakSymbols.size(), getStream()); + for (const StringRef Export: WeakSymbols) { + writeString(Export); + encodeULEB128(wasm::WASM_SYMBOL_FLAG_WEAK, getStream()); + } + endSection(SubSection); + } endSection(Section); } void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { + DEBUG(dbgs() << "WasmObjectWriter::writeObject\n"); MCContext &Ctx = Asm.getContext(); wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32; @@ -894,6 +918,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, SmallVector<WasmGlobal, 4> Globals; SmallVector<WasmImport, 4> Imports; SmallVector<WasmExport, 4> Exports; + SmallVector<StringRef, 4> WeakSymbols; SmallPtrSet<const MCSymbolWasm *, 4> IsAddressTaken; unsigned NumFuncImports = 0; unsigned NumGlobalImports = 0; @@ -902,7 +927,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, bool HasStackPointer = false; // Populate the IsAddressTaken set. - for (WasmRelocationEntry RelEntry : CodeRelocations) { + for (const WasmRelocationEntry &RelEntry : CodeRelocations) { switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: @@ -912,7 +937,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, break; } } - for (WasmRelocationEntry RelEntry : DataRelocations) { + for (const WasmRelocationEntry &RelEntry : DataRelocations) { switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: @@ -975,7 +1000,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCFragment &Frag = *GlobalVars->begin(); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .global_variables"); - const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag); + const auto &DataFrag = cast<MCDataFragment>(Frag); if (!DataFrag.getFixups().empty()) report_fatal_error("fixups not supported in .global_variables"); const SmallVectorImpl<char> &Contents = DataFrag.getContents(); @@ -1031,7 +1056,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCFragment &Frag = *StackPtr->begin(); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .stack_pointer"); - const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag); + const auto &DataFrag = cast<MCDataFragment>(Frag); if (!DataFrag.getFixups().empty()) report_fatal_error("fixups not supported in .stack_pointer"); const SmallVectorImpl<char> &Contents = DataFrag.getContents(); @@ -1041,14 +1066,30 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, StackPointerGlobal = NumGlobalImports + *(const int32_t *)Contents.data(); } - // Handle defined symbols. + // Handle regular defined and undefined symbols. for (const MCSymbol &S : Asm.symbols()) { // Ignore unnamed temporary symbols, which aren't ever exported, imported, // or used in relocations. if (S.isTemporary() && S.getName().empty()) continue; + + // Variable references (weak references) are handled in a second pass + if (S.isVariable()) + continue; + const auto &WS = static_cast<const MCSymbolWasm &>(S); + DEBUG(dbgs() << "MCSymbol: '" << S << "'" + << " isDefined=" << S.isDefined() << " isExternal=" + << S.isExternal() << " isTemporary=" << S.isTemporary() + << " isFunction=" << WS.isFunction() + << " isWeak=" << WS.isWeak() + << " isVariable=" << WS.isVariable() << "\n"); + + if (WS.isWeak()) + WeakSymbols.push_back(WS.getName()); + unsigned Index; + if (WS.isFunction()) { // Prepare the function's type, if we haven't seen it yet. WasmFunctionType F; @@ -1062,6 +1103,14 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, int32_t Type = Pair.first->second; if (WS.isDefined(/*SetUsed=*/false)) { + if (WS.getOffset() != 0) + report_fatal_error( + "function sections must contain one function each"); + + if (WS.getSize() == 0) + report_fatal_error( + "function symbols must have a size set with .size"); + // A definition. Take the next available index. Index = NumFuncImports + Functions.size(); @@ -1072,6 +1121,9 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, SymbolIndices[&WS] = Index; Functions.push_back(Func); } else { + // Should be no such thing as weak undefined symbol + assert(!WS.isVariable()); + // An import; the index was assigned above. Index = SymbolIndices.find(&WS)->second; } @@ -1085,86 +1137,108 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (WS.isTemporary() && !WS.getSize()) continue; - if (WS.isDefined(false)) { - if (WS.getOffset() != 0) - report_fatal_error("data sections must contain one variable each: " + - WS.getName()); - if (!WS.getSize()) - report_fatal_error("data symbols must have a size set with .size: " + - WS.getName()); - - int64_t Size = 0; - if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) - report_fatal_error(".size expression must be evaluatable"); - - MCSectionWasm &DataSection = - static_cast<MCSectionWasm &>(WS.getSection()); - - if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection)) - report_fatal_error("data sections must contain at most one variable"); - - DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); - - DataSection.setSectionOffset(DataBytes.size()); - - for (MCSection::iterator I = DataSection.begin(), E = DataSection.end(); - I != E; ++I) { - const MCFragment &Frag = *I; - if (Frag.hasInstructions()) - report_fatal_error("only data supported in data sections"); - - if (const MCAlignFragment *Align = dyn_cast<MCAlignFragment>(&Frag)) { - if (Align->getValueSize() != 1) - report_fatal_error("only byte values supported for alignment"); - // If nops are requested, use zeros, as this is the data section. - uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue(); - uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(), - Align->getAlignment()), - DataBytes.size() + - Align->getMaxBytesToEmit()); - DataBytes.resize(Size, Value); - } else if (const MCFillFragment *Fill = - dyn_cast<MCFillFragment>(&Frag)) { - DataBytes.insert(DataBytes.end(), Size, Fill->getValue()); - } else { - const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag); - const SmallVectorImpl<char> &Contents = DataFrag.getContents(); - - DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end()); - } - } - - // For each global, prepare a corresponding wasm global holding its - // address. For externals these will also be named exports. - Index = NumGlobalImports + Globals.size(); + if (!WS.isDefined(/*SetUsed=*/false)) + continue; - WasmGlobal Global; - Global.Type = PtrType; - Global.IsMutable = false; - Global.HasImport = false; - Global.InitialValue = DataSection.getSectionOffset(); - Global.ImportIndex = 0; - SymbolIndices[&WS] = Index; - Globals.push_back(Global); + if (WS.getOffset() != 0) + report_fatal_error("data sections must contain one variable each: " + + WS.getName()); + if (!WS.getSize()) + report_fatal_error("data symbols must have a size set with .size: " + + WS.getName()); + + int64_t Size = 0; + if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) + report_fatal_error(".size expression must be evaluatable"); + + auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection()); + + if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection)) + report_fatal_error("data sections must contain at most one variable"); + + DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); + + DataSection.setSectionOffset(DataBytes.size()); + + for (const MCFragment &Frag : DataSection) { + if (Frag.hasInstructions()) + report_fatal_error("only data supported in data sections"); + + if (auto *Align = dyn_cast<MCAlignFragment>(&Frag)) { + if (Align->getValueSize() != 1) + report_fatal_error("only byte values supported for alignment"); + // If nops are requested, use zeros, as this is the data section. + uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue(); + uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(), + Align->getAlignment()), + DataBytes.size() + + Align->getMaxBytesToEmit()); + DataBytes.resize(Size, Value); + } else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) { + DataBytes.insert(DataBytes.end(), Size, Fill->getValue()); + } else { + const auto &DataFrag = cast<MCDataFragment>(Frag); + const SmallVectorImpl<char> &Contents = DataFrag.getContents(); + + DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end()); + } } + + // For each global, prepare a corresponding wasm global holding its + // address. For externals these will also be named exports. + Index = NumGlobalImports + Globals.size(); + + WasmGlobal Global; + Global.Type = PtrType; + Global.IsMutable = false; + Global.HasImport = false; + Global.InitialValue = DataSection.getSectionOffset(); + Global.ImportIndex = 0; + SymbolIndices[&WS] = Index; + Globals.push_back(Global); } // If the symbol is visible outside this translation unit, export it. - if (WS.isExternal()) { - assert(WS.isDefined(false)); + if (WS.isExternal() && WS.isDefined(/*SetUsed=*/false)) { WasmExport Export; Export.FieldName = WS.getName(); Export.Index = Index; - if (WS.isFunction()) Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; else Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; - Exports.push_back(Export); } } + // Handle weak aliases + for (const MCSymbol &S : Asm.symbols()) { + if (!S.isVariable()) + continue; + assert(S.isExternal()); + assert(S.isDefined(/*SetUsed=*/false)); + + const auto &WS = static_cast<const MCSymbolWasm &>(S); + + // Find the target symbol of this weak alias + const MCExpr *Expr = WS.getVariableValue(); + auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr); + const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol()); + uint32_t Index = SymbolIndices.find(ResolvedSym)->second; + DEBUG(dbgs() << "Weak alias: '" << WS << "' -> '" << ResolvedSym << "' = " << Index << "\n"); + SymbolIndices[&WS] = Index; + + WasmExport Export; + Export.FieldName = WS.getName(); + Export.Index = Index; + if (WS.isFunction()) + Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; + else + Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; + WeakSymbols.push_back(Export.FieldName); + Exports.push_back(Export); + } + // Add types for indirect function calls. for (const WasmRelocationEntry &Fixup : CodeRelocations) { if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) @@ -1198,7 +1272,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, writeNameSection(Functions, Imports, NumFuncImports); writeCodeRelocSection(); writeDataRelocSection(DataSectionHeaderSize); - writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal); + writeLinkingMetaDataSection(WeakSymbols, HasStackPointer, StackPointerGlobal); // TODO: Translate the .comment section to the output. // TODO: Translate debug sections to the output. diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 53dee3e8b9f36..fc5234950391b 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -735,7 +735,6 @@ void WinCOFFObjectWriter::recordRelocation( COFFSection *Sec = SectionMap[MCSec]; const MCSymbolRefExpr *SymB = Target.getSymB(); - bool CrossSection = false; if (SymB) { const MCSymbol *B = &SymB->getSymbol(); @@ -747,28 +746,9 @@ void WinCOFFObjectWriter::recordRelocation( return; } - if (!A.getFragment()) { - Asm.getContext().reportError( - Fixup.getLoc(), - Twine("symbol '") + A.getName() + - "' can not be undefined in a subtraction expression"); - return; - } - - CrossSection = &A.getSection() != &B->getSection(); - // Offset of the symbol in the section int64_t OffsetOfB = Layout.getSymbolOffset(*B); - // In the case where we have SymbA and SymB, we just need to store the delta - // between the two symbols. Update FixedValue to account for the delta, and - // skip recording the relocation. - if (!CrossSection) { - int64_t OffsetOfA = Layout.getSymbolOffset(A); - FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant(); - return; - } - // Offset of the relocation in the section int64_t OffsetOfRelocation = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -784,7 +764,7 @@ void WinCOFFObjectWriter::recordRelocation( Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment); // Turn relocations for temporary symbols into section relocations. - if (A.isTemporary() || CrossSection) { + if (A.isTemporary()) { MCSection *TargetSection = &A.getSection(); assert( SectionMap.find(TargetSection) != SectionMap.end() && @@ -802,7 +782,7 @@ void WinCOFFObjectWriter::recordRelocation( Reloc.Data.VirtualAddress += Fixup.getOffset(); Reloc.Data.Type = TargetObjectWriter->getRelocType( - Target, Fixup, CrossSection, Asm.getBackend()); + Asm.getContext(), Target, Fixup, SymB, Asm.getBackend()); // FIXME: Can anyone explain what this does other than adjust for the size // of the offset? diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 579c8dde366a0..9a760d86e7e22 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -650,6 +650,23 @@ std::error_code COFFObjectFile::initDebugDirectoryPtr() { return std::error_code(); } +std::error_code COFFObjectFile::initLoadConfigPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry; + if (getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + + LoadConfig = (const void *)IntPtr; + return std::error_code(); +} + COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) : ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr), COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr), @@ -784,6 +801,9 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) if ((EC = initDebugDirectoryPtr())) return; + if ((EC = initLoadConfigPtr())) + return; + EC = std::error_code(); } diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp index a6cd5dda12d3e..7bca032a7be1d 100644 --- a/lib/Object/IRSymtab.cpp +++ b/lib/Object/IRSymtab.cpp @@ -46,15 +46,15 @@ namespace { /// Stores the temporary state that is required to build an IR symbol table. struct Builder { SmallVector<char, 0> &Symtab; - SmallVector<char, 0> &Strtab; + StringTableBuilder &StrtabBuilder; + StringSaver Saver; - Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab) - : Symtab(Symtab), Strtab(Strtab) {} - - StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; - - BumpPtrAllocator Alloc; - StringSaver Saver{Alloc}; + // This ctor initializes a StringSaver using the passed in BumpPtrAllocator. + // The StringTableBuilder does not create a copy of any strings added to it, + // so this provides somewhere to store any strings that we create. + Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} DenseMap<const Comdat *, unsigned> ComdatMap; Mangler Mang; @@ -240,7 +240,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) { return Err; COFFLinkerOptsOS.flush(); - setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); + setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts)); // We are about to fill in the header's range fields, so reserve space for it // and copy it in afterwards. @@ -251,19 +251,15 @@ Error Builder::build(ArrayRef<Module *> IRMods) { writeRange(Hdr.Uncommons, Uncommons); *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr; - - raw_svector_ostream OS(Strtab); - StrtabBuilder.finalizeInOrder(); - StrtabBuilder.write(OS); - return Error::success(); } } // end anonymous namespace Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, - SmallVector<char, 0> &Strtab) { - return Builder(Symtab, Strtab).build(Mods); + StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) { + return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); } // Upgrade a vector of bitcode modules created by an old version of LLVM by @@ -285,9 +281,15 @@ static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) { OwnedMods.push_back(std::move(*MOrErr)); } - if (Error E = build(Mods, FC.Symtab, FC.Strtab)) + StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); + BumpPtrAllocator Alloc; + if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc)) return std::move(E); + StrtabBuilder.finalizeInOrder(); + FC.Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)FC.Strtab.data()); + FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, {FC.Strtab.data(), FC.Strtab.size()}}; return std::move(FC); diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 7804bbe06f83e..2e4da9f15aa13 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1951,13 +1951,29 @@ MachOObjectFile::section_rel_end(DataRefImpl Sec) const { return relocation_iterator(RelocationRef(Ret, this)); } +relocation_iterator MachOObjectFile::extrel_begin() const { + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = 0; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { ++Rel.d.b; } uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { - assert(getHeader().filetype == MachO::MH_OBJECT && - "Only implemented for MH_OBJECT"); + assert((getHeader().filetype == MachO::MH_OBJECT || + getHeader().filetype == MachO::MH_KEXT_BUNDLE) && + "Only implemented for MH_OBJECT && MH_KEXT_BUNDLE"); MachO::any_relocation_info RE = getRelocation(Rel); return getAnyRelocationAddress(RE); } @@ -4086,15 +4102,20 @@ MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { MachO::any_relocation_info MachOObjectFile::getRelocation(DataRefImpl Rel) const { - DataRefImpl Sec; - Sec.d.a = Rel.d.a; uint32_t Offset; - if (is64Bit()) { - MachO::section_64 Sect = getSection64(Sec); - Offset = Sect.reloff; + if (getHeader().filetype == MachO::MH_OBJECT) { + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } } else { - MachO::section Sect = getSection(Sec); - Offset = Sect.reloff; + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations } auto P = reinterpret_cast<const MachO::any_relocation_info *>( diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index 2304098c1dc9f..d15860674aeb9 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -28,6 +28,8 @@ #include <cstring> #include <system_error> +#define DEBUG_TYPE "wasm-object" + using namespace llvm; using namespace object; @@ -256,6 +258,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { while (Ptr < End) { uint8_t Type = readVarint7(Ptr); uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; switch (Type) { case wasm::WASM_NAMES_FUNCTION: { uint32_t Count = readVaruint32(Ptr); @@ -275,6 +278,9 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { Ptr += Size; break; } + if (Ptr != SubSectionEnd) + return make_error<GenericBinaryError>("Name sub-section ended prematurely", + object_error::parse_failed); } if (Ptr != End) @@ -283,6 +289,50 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { return Error::success(); } +Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, + const uint8_t *End) { + while (Ptr < End) { + uint8_t Type = readVarint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; + switch (Type) { + case wasm::WASM_SYMBOL_INFO: { + uint32_t Count = readVaruint32(Ptr); + while (Count--) { + StringRef Symbol = readString(Ptr); + DEBUG(dbgs() << "reading syminfo: " << Symbol << "\n"); + uint32_t Flags = readVaruint32(Ptr); + auto iter = SymbolMap.find(Symbol); + if (iter == SymbolMap.end()) { + return make_error<GenericBinaryError>( + "Invalid symbol name in linking section", + object_error::parse_failed); + } + uint32_t SymIndex = iter->second; + assert(SymIndex < Symbols.size()); + Symbols[SymIndex].Flags = Flags; + DEBUG(dbgs() << "Set symbol flags index:" + << SymIndex << " name:" + << Symbols[SymIndex].Name << " exptected:" + << Symbol << " flags: " << Flags << "\n"); + } + break; + } + case wasm::WASM_STACK_POINTER: + default: + Ptr += Size; + break; + } + if (Ptr != SubSectionEnd) + return make_error<GenericBinaryError>( + "Linking sub-section ended prematurely", object_error::parse_failed); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Linking section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) { for (WasmSection& Section : Sections) { if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name) @@ -325,6 +375,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: break; case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: @@ -332,7 +383,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, Reloc.Addend = readVarint32(Ptr); break; default: - return make_error<GenericBinaryError>("Bad relocation type", + return make_error<GenericBinaryError>("Bad relocation type: " + + Twine(Reloc.Type), object_error::parse_failed); } Section->Relocations.push_back(Reloc); @@ -349,6 +401,9 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, if (Sec.Name == "name") { if (Error Err = parseNameSection(Ptr, End)) return Err; + } else if (Sec.Name == "linking") { + if (Error Err = parseLinkingSection(Ptr, End)) + return Err; } else if (Sec.Name.startswith("reloc.")) { if (Error Err = parseRelocSection(Sec.Name, Ptr, End)) return Err; @@ -400,14 +455,20 @@ Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) switch (Im.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: Im.SigIndex = readVaruint32(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT, Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); break; case wasm::WASM_EXTERNAL_GLOBAL: Im.Global.Type = readVarint7(Ptr); Im.Global.Mutable = readVaruint1(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT, Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); break; case wasm::WASM_EXTERNAL_MEMORY: Im.Memory = readLimits(Ptr); @@ -496,15 +557,20 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) Ex.Name = readString(Ptr); Ex.Kind = readUint8(Ptr); Ex.Index = readVaruint32(Ptr); - Exports.push_back(Ex); switch (Ex.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: + SymbolMap.try_emplace(Ex.Name, Symbols.size()); Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT, Sections.size(), i); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); break; case wasm::WASM_EXTERNAL_GLOBAL: + SymbolMap.try_emplace(Ex.Name, Symbols.size()); Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT, Sections.size(), i); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); break; case wasm::WASM_EXTERNAL_MEMORY: case wasm::WASM_EXTERNAL_TABLE: @@ -513,6 +579,7 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) return make_error<GenericBinaryError>( "Unexpected export kind", object_error::parse_failed); } + Exports.push_back(Ex); } if (Ptr != End) return make_error<GenericBinaryError>("Export section ended prematurely", @@ -620,6 +687,10 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { uint32_t Result = SymbolRef::SF_None; const WasmSymbol &Sym = getWasmSymbol(Symb); + DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n"); + if (Sym.Flags & wasm::WASM_SYMBOL_FLAG_WEAK) + Result |= SymbolRef::SF_Weak; + switch (Sym.Type) { case WasmSymbol::SymbolType::FUNCTION_IMPORT: Result |= SymbolRef::SF_Undefined | SymbolRef::SF_Executable; @@ -629,6 +700,7 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { break; case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: Result |= SymbolRef::SF_Executable; + Result |= SymbolRef::SF_FormatSpecific; break; case WasmSymbol::SymbolType::GLOBAL_IMPORT: Result |= SymbolRef::SF_Undefined; @@ -662,8 +734,7 @@ const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const { } Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { - const WasmSymbol &Sym = getWasmSymbol(Symb); - return Sym.Name; + return getWasmSymbol(Symb).Name; } Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { @@ -671,8 +742,18 @@ Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { } uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { - const WasmSymbol &Sym = getWasmSymbol(Symb); - return Sym.ElementIndex; + const WasmSymbol& Sym = getWasmSymbol(Symb); + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + return 0; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return Exports[Sym.ElementIndex].Index; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return Sym.ElementIndex; + } + llvm_unreachable("invalid symbol type"); } uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index 3f6080d48f9d1..ff9b9ca35eb5b 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/WindowsResource.h" -#include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFF.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/MathExtras.h" @@ -21,6 +20,9 @@ #include <sstream> #include <system_error> +using namespace llvm; +using namespace object; + namespace llvm { namespace object { @@ -34,23 +36,19 @@ const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); // 8-byte because it makes everyone happy. const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); -static const size_t ResourceMagicSize = 16; - -static const size_t NullEntrySize = 16; - uint32_t WindowsResourceParser::TreeNode::StringCount = 0; uint32_t WindowsResourceParser::TreeNode::DataCount = 0; WindowsResource::WindowsResource(MemoryBufferRef Source) : Binary(Binary::ID_WinRes, Source) { - size_t LeadingSize = ResourceMagicSize + NullEntrySize; + size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize), support::little); } Expected<std::unique_ptr<WindowsResource>> WindowsResource::createWindowsResource(MemoryBufferRef Source) { - if (Source.getBufferSize() < ResourceMagicSize + NullEntrySize) + if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE) return make_error<GenericBinaryError>( "File too small to be a resource file", object_error::invalid_file_type); @@ -103,12 +101,10 @@ static Error readStringOrId(BinaryStreamReader &Reader, uint16_t &ID, } Error ResourceEntryRef::loadNext() { - uint32_t DataSize; - RETURN_IF_ERROR(Reader.readInteger(DataSize)); - uint32_t HeaderSize; - RETURN_IF_ERROR(Reader.readInteger(HeaderSize)); + const WinResHeaderPrefix *Prefix; + RETURN_IF_ERROR(Reader.readObject(Prefix)); - if (HeaderSize < MIN_HEADER_SIZE) + if (Prefix->HeaderSize < MIN_HEADER_SIZE) return make_error<GenericBinaryError>("Header size is too small.", object_error::parse_failed); @@ -116,13 +112,13 @@ Error ResourceEntryRef::loadNext() { RETURN_IF_ERROR(readStringOrId(Reader, NameID, Name, IsStringName)); - RETURN_IF_ERROR(Reader.padToAlignment(sizeof(uint32_t))); + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_HEADER_ALIGNMENT)); RETURN_IF_ERROR(Reader.readObject(Suffix)); - RETURN_IF_ERROR(Reader.readArray(Data, DataSize)); + RETURN_IF_ERROR(Reader.readArray(Data, Prefix->DataSize)); - RETURN_IF_ERROR(Reader.padToAlignment(sizeof(uint32_t))); + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_DATA_ALIGNMENT)); return Error::success(); } @@ -246,14 +242,14 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef, std::string NameString; ArrayRef<UTF16> CorrectedName; std::vector<UTF16> EndianCorrectedName; - if (llvm::sys::IsBigEndianHost) { + if (sys::IsBigEndianHost) { EndianCorrectedName.resize(NameRef.size() + 1); std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1); EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED; CorrectedName = makeArrayRef(EndianCorrectedName); } else CorrectedName = NameRef; - llvm::convertUTF16ToUTF8String(CorrectedName, NameString); + convertUTF16ToUTF8String(CorrectedName, NameString); auto Child = StringChildren.find(NameString); if (Child == StringChildren.end()) { @@ -282,17 +278,17 @@ void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer, // the directory strings or the relocations of the .rsrc section. uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { uint32_t Size = (IDChildren.size() + StringChildren.size()) * - sizeof(llvm::object::coff_resource_dir_entry); + sizeof(coff_resource_dir_entry); // Reached a node pointing to a data entry. if (IsDataNode) { - Size += sizeof(llvm::object::coff_resource_data_entry); + Size += sizeof(coff_resource_data_entry); return Size; } // If the node does not point to data, it must have a directory table pointing // to other nodes. - Size += sizeof(llvm::object::coff_resource_dir_table); + Size += sizeof(coff_resource_dir_table); for (auto const &Child : StringChildren) { Size += Child.second->getTreeSize(); @@ -305,9 +301,9 @@ uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { class WindowsResourceCOFFWriter { public: - WindowsResourceCOFFWriter(StringRef OutputFile, Machine MachineType, + WindowsResourceCOFFWriter(COFF::MachineTypes MachineType, const WindowsResourceParser &Parser, Error &E); - Error write(); + std::unique_ptr<MemoryBuffer> write(); private: void performFileLayout(); @@ -323,10 +319,10 @@ private: void writeDirectoryTree(); void writeDirectoryStringTable(); void writeFirstSectionRelocations(); - std::unique_ptr<FileOutputBuffer> Buffer; - uint8_t *BufferStart; + std::unique_ptr<MemoryBuffer> OutputBuffer; + char *BufferStart; uint64_t CurrentOffset = 0; - Machine MachineType; + COFF::MachineTypes MachineType; const WindowsResourceParser::TreeNode &Resources; const ArrayRef<std::vector<uint8_t>> Data; uint64_t FileSize; @@ -343,28 +339,21 @@ private: }; WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( - StringRef OutputFile, Machine MachineType, - const WindowsResourceParser &Parser, Error &E) + COFF::MachineTypes MachineType, const WindowsResourceParser &Parser, + Error &E) : MachineType(MachineType), Resources(Parser.getTree()), Data(Parser.getData()), StringTable(Parser.getStringTable()) { performFileLayout(); - ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = - FileOutputBuffer::create(OutputFile, FileSize); - if (!BufferOrErr) { - E = errorCodeToError(BufferOrErr.getError()); - return; - } - - Buffer = std::move(*BufferOrErr); + OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize); } void WindowsResourceCOFFWriter::performFileLayout() { // Add size of COFF header. - FileSize = llvm::COFF::Header16Size; + FileSize = COFF::Header16Size; // one .rsrc section header for directory tree, another for resource data. - FileSize += 2 * llvm::COFF::SectionSize; + FileSize += 2 * COFF::SectionSize; performSectionOneLayout(); performSectionTwoLayout(); @@ -372,9 +361,9 @@ void WindowsResourceCOFFWriter::performFileLayout() { // We have reached the address of the symbol table. SymbolTableOffset = FileSize; - FileSize += llvm::COFF::Symbol16Size; // size of the @feat.00 symbol. - FileSize += 4 * llvm::COFF::Symbol16Size; // symbol + aux for each section. - FileSize += Data.size() * llvm::COFF::Symbol16Size; // 1 symbol per resource. + FileSize += COFF::Symbol16Size; // size of the @feat.00 symbol. + FileSize += 4 * COFF::Symbol16Size; // symbol + aux for each section. + FileSize += Data.size() * COFF::Symbol16Size; // 1 symbol per resource. FileSize += 4; // four null bytes for the string table. } @@ -395,8 +384,8 @@ void WindowsResourceCOFFWriter::performSectionOneLayout() { // account for the relocations of section one. SectionOneRelocations = FileSize + SectionOneSize; FileSize += SectionOneSize; - FileSize += Data.size() * - llvm::COFF::RelocationSize; // one relocation for each resource. + FileSize += + Data.size() * COFF::RelocationSize; // one relocation for each resource. FileSize = alignTo(FileSize, SECTION_ALIGNMENT); } @@ -407,7 +396,7 @@ void WindowsResourceCOFFWriter::performSectionTwoLayout() { SectionTwoSize = 0; for (auto const &Entry : Data) { DataOffsets.push_back(SectionTwoSize); - SectionTwoSize += llvm::alignTo(Entry.size(), sizeof(uint64_t)); + SectionTwoSize += alignTo(Entry.size(), sizeof(uint64_t)); } FileSize += SectionTwoSize; FileSize = alignTo(FileSize, SECTION_ALIGNMENT); @@ -420,8 +409,8 @@ static std::time_t getTime() { return Now; } -Error WindowsResourceCOFFWriter::write() { - BufferStart = Buffer->getBufferStart(); +std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() { + BufferStart = const_cast<char *>(OutputBuffer->getBufferStart()); writeCOFFHeader(); writeFirstSectionHeader(); @@ -431,29 +420,24 @@ Error WindowsResourceCOFFWriter::write() { writeSymbolTable(); writeStringTable(); - if (auto EC = Buffer->commit()) { - return errorCodeToError(EC); - } - - return Error::success(); + return std::move(OutputBuffer); } void WindowsResourceCOFFWriter::writeCOFFHeader() { // Write the COFF header. - auto *Header = - reinterpret_cast<llvm::object::coff_file_header *>(BufferStart); + auto *Header = reinterpret_cast<coff_file_header *>(BufferStart); switch (MachineType) { - case Machine::ARM: - Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Header->Machine = COFF::IMAGE_FILE_MACHINE_ARMNT; break; - case Machine::X64: - Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Header->Machine = COFF::IMAGE_FILE_MACHINE_AMD64; break; - case Machine::X86: - Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_I386; + case COFF::IMAGE_FILE_MACHINE_I386: + Header->Machine = COFF::IMAGE_FILE_MACHINE_I386; break; default: - Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; + Header->Machine = COFF::IMAGE_FILE_MACHINE_UNKNOWN; } Header->NumberOfSections = 2; Header->TimeDateStamp = getTime(); @@ -461,15 +445,15 @@ void WindowsResourceCOFFWriter::writeCOFFHeader() { // One symbol for every resource plus 2 for each section and @feat.00 Header->NumberOfSymbols = Data.size() + 5; Header->SizeOfOptionalHeader = 0; - Header->Characteristics = llvm::COFF::IMAGE_FILE_32BIT_MACHINE; + Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE; } void WindowsResourceCOFFWriter::writeFirstSectionHeader() { // Write the first section header. - CurrentOffset += sizeof(llvm::object::coff_file_header); - auto *SectionOneHeader = reinterpret_cast<llvm::object::coff_section *>( - BufferStart + CurrentOffset); - strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)llvm::COFF::NameSize); + CurrentOffset += sizeof(coff_file_header); + auto *SectionOneHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize); SectionOneHeader->VirtualSize = 0; SectionOneHeader->VirtualAddress = 0; SectionOneHeader->SizeOfRawData = SectionOneSize; @@ -478,19 +462,16 @@ void WindowsResourceCOFFWriter::writeFirstSectionHeader() { SectionOneHeader->PointerToLinenumbers = 0; SectionOneHeader->NumberOfRelocations = Data.size(); SectionOneHeader->NumberOfLinenumbers = 0; - SectionOneHeader->Characteristics = llvm::COFF::IMAGE_SCN_ALIGN_1BYTES; - SectionOneHeader->Characteristics += - llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; - SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE; - SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; } void WindowsResourceCOFFWriter::writeSecondSectionHeader() { // Write the second section header. - CurrentOffset += sizeof(llvm::object::coff_section); - auto *SectionTwoHeader = reinterpret_cast<llvm::object::coff_section *>( - BufferStart + CurrentOffset); - strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)llvm::COFF::NameSize); + CurrentOffset += sizeof(coff_section); + auto *SectionTwoHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize); SectionTwoHeader->VirtualSize = 0; SectionTwoHeader->VirtualAddress = 0; SectionTwoHeader->SizeOfRawData = SectionTwoSize; @@ -499,14 +480,13 @@ void WindowsResourceCOFFWriter::writeSecondSectionHeader() { SectionTwoHeader->PointerToLinenumbers = 0; SectionTwoHeader->NumberOfRelocations = 0; SectionTwoHeader->NumberOfLinenumbers = 0; - SectionTwoHeader->Characteristics = - llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; - SectionTwoHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ; + SectionTwoHeader->Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionTwoHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; } void WindowsResourceCOFFWriter::writeFirstSection() { // Write section one. - CurrentOffset += sizeof(llvm::object::coff_section); + CurrentOffset += sizeof(coff_section); writeDirectoryTree(); writeDirectoryStringTable(); @@ -529,70 +509,65 @@ void WindowsResourceCOFFWriter::writeSecondSection() { void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the symbol table. // First, the feat symbol. - auto *Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + - CurrentOffset); - strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)llvm::COFF::NameSize); + auto *Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize); Symbol->Value = 0x11; Symbol->SectionNumber = 0xffff; - Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; - Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 0; - CurrentOffset += sizeof(llvm::object::coff_symbol16); + CurrentOffset += sizeof(coff_symbol16); // Now write the .rsrc1 symbol + aux. - Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + - CurrentOffset); - strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)llvm::COFF::NameSize); + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize); Symbol->Value = 0; Symbol->SectionNumber = 1; - Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; - Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 1; - CurrentOffset += sizeof(llvm::object::coff_symbol16); - auto *Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>( - BufferStart + CurrentOffset); + CurrentOffset += sizeof(coff_symbol16); + auto *Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); Aux->Length = SectionOneSize; Aux->NumberOfRelocations = Data.size(); Aux->NumberOfLinenumbers = 0; Aux->CheckSum = 0; Aux->NumberLowPart = 0; Aux->Selection = 0; - CurrentOffset += sizeof(llvm::object::coff_aux_section_definition); + CurrentOffset += sizeof(coff_aux_section_definition); // Now write the .rsrc2 symbol + aux. - Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + - CurrentOffset); - strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)llvm::COFF::NameSize); + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize); Symbol->Value = 0; Symbol->SectionNumber = 2; - Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; - Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 1; - CurrentOffset += sizeof(llvm::object::coff_symbol16); - Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>( - BufferStart + CurrentOffset); + CurrentOffset += sizeof(coff_symbol16); + Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); Aux->Length = SectionTwoSize; Aux->NumberOfRelocations = 0; Aux->NumberOfLinenumbers = 0; Aux->CheckSum = 0; Aux->NumberLowPart = 0; Aux->Selection = 0; - CurrentOffset += sizeof(llvm::object::coff_aux_section_definition); + CurrentOffset += sizeof(coff_aux_section_definition); // Now write a symbol for each relocation. for (unsigned i = 0; i < Data.size(); i++) { char RelocationName[9]; sprintf(RelocationName, "$R%06X", DataOffsets[i]); - Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart + - CurrentOffset); - strncpy(Symbol->Name.ShortName, RelocationName, - (size_t)llvm::COFF::NameSize); + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize); Symbol->Value = DataOffsets[i]; Symbol->SectionNumber = 1; - Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; - Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; Symbol->NumberOfAuxSymbols = 0; - CurrentOffset += sizeof(llvm::object::coff_symbol16); + CurrentOffset += sizeof(coff_symbol16); } } @@ -607,18 +582,18 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { // COFF objects. std::queue<const WindowsResourceParser::TreeNode *> Queue; Queue.push(&Resources); - uint32_t NextLevelOffset = sizeof(llvm::object::coff_resource_dir_table) + - (Resources.getStringChildren().size() + - Resources.getIDChildren().size()) * - sizeof(llvm::object::coff_resource_dir_entry); + uint32_t NextLevelOffset = + sizeof(coff_resource_dir_table) + (Resources.getStringChildren().size() + + Resources.getIDChildren().size()) * + sizeof(coff_resource_dir_entry); std::vector<const WindowsResourceParser::TreeNode *> DataEntriesTreeOrder; uint32_t CurrentRelativeOffset = 0; while (!Queue.empty()) { auto CurrentNode = Queue.front(); Queue.pop(); - auto *Table = reinterpret_cast<llvm::object::coff_resource_dir_table *>( - BufferStart + CurrentOffset); + auto *Table = reinterpret_cast<coff_resource_dir_table *>(BufferStart + + CurrentOffset); Table->Characteristics = CurrentNode->getCharacteristics(); Table->TimeDateStamp = 0; Table->MajorVersion = CurrentNode->getMajorVersion(); @@ -627,63 +602,63 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { auto &StringChildren = CurrentNode->getStringChildren(); Table->NumberOfNameEntries = StringChildren.size(); Table->NumberOfIDEntries = IDChildren.size(); - CurrentOffset += sizeof(llvm::object::coff_resource_dir_table); - CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_table); + CurrentOffset += sizeof(coff_resource_dir_table); + CurrentRelativeOffset += sizeof(coff_resource_dir_table); // Write the directory entries immediately following each directory table. for (auto const &Child : StringChildren) { - auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>( - BufferStart + CurrentOffset); + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); Entry->Identifier.NameOffset = StringTableOffsets[Child.second->getStringIndex()]; if (Child.second->checkIsDataNode()) { Entry->Offset.DataEntryOffset = NextLevelOffset; - NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry); + NextLevelOffset += sizeof(coff_resource_data_entry); DataEntriesTreeOrder.push_back(Child.second.get()); } else { Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); - NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) + + NextLevelOffset += sizeof(coff_resource_dir_table) + (Child.second->getStringChildren().size() + Child.second->getIDChildren().size()) * - sizeof(llvm::object::coff_resource_dir_entry); + sizeof(coff_resource_dir_entry); Queue.push(Child.second.get()); } - CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry); - CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry); + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); } for (auto const &Child : IDChildren) { - auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>( - BufferStart + CurrentOffset); + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); Entry->Identifier.ID = Child.first; if (Child.second->checkIsDataNode()) { Entry->Offset.DataEntryOffset = NextLevelOffset; - NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry); + NextLevelOffset += sizeof(coff_resource_data_entry); DataEntriesTreeOrder.push_back(Child.second.get()); } else { Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); - NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) + + NextLevelOffset += sizeof(coff_resource_dir_table) + (Child.second->getStringChildren().size() + Child.second->getIDChildren().size()) * - sizeof(llvm::object::coff_resource_dir_entry); + sizeof(coff_resource_dir_entry); Queue.push(Child.second.get()); } - CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry); - CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry); + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); } } RelocationAddresses.resize(Data.size()); // Now write all the resource data entries. for (auto DataNodes : DataEntriesTreeOrder) { - auto *Entry = reinterpret_cast<llvm::object::coff_resource_data_entry *>( - BufferStart + CurrentOffset); + auto *Entry = reinterpret_cast<coff_resource_data_entry *>(BufferStart + + CurrentOffset); RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; Entry->DataRVA = 0; // Set to zero because it is a relocation. Entry->DataSize = Data[DataNodes->getDataIndex()].size(); Entry->Codepage = 0; Entry->Reserved = 0; - CurrentOffset += sizeof(llvm::object::coff_resource_data_entry); - CurrentRelativeOffset += sizeof(llvm::object::coff_resource_data_entry); + CurrentOffset += sizeof(coff_resource_data_entry); + CurrentRelativeOffset += sizeof(coff_resource_data_entry); } } @@ -710,33 +685,34 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { // .rsrc section. uint32_t NextSymbolIndex = 5; for (unsigned i = 0; i < Data.size(); i++) { - auto *Reloc = reinterpret_cast<llvm::object::coff_relocation *>( - BufferStart + CurrentOffset); + auto *Reloc = + reinterpret_cast<coff_relocation *>(BufferStart + CurrentOffset); Reloc->VirtualAddress = RelocationAddresses[i]; Reloc->SymbolTableIndex = NextSymbolIndex++; switch (MachineType) { - case Machine::ARM: - Reloc->Type = llvm::COFF::IMAGE_REL_ARM_ADDR32NB; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB; break; - case Machine::X64: - Reloc->Type = llvm::COFF::IMAGE_REL_AMD64_ADDR32NB; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB; break; - case Machine::X86: - Reloc->Type = llvm::COFF::IMAGE_REL_I386_DIR32NB; + case COFF::IMAGE_FILE_MACHINE_I386: + Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB; break; default: Reloc->Type = 0; } - CurrentOffset += sizeof(llvm::object::coff_relocation); + CurrentOffset += sizeof(coff_relocation); } } -Error writeWindowsResourceCOFF(StringRef OutputFile, Machine MachineType, - const WindowsResourceParser &Parser) { +Expected<std::unique_ptr<MemoryBuffer>> +writeWindowsResourceCOFF(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser) { Error E = Error::success(); - WindowsResourceCOFFWriter Writer(OutputFile, MachineType, Parser, E); + WindowsResourceCOFFWriter Writer(MachineType, Parser, E); if (E) - return E; + return std::move(E); return Writer.write(); } diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index ba3a2abe20978..83f3d55b8e556 100644 --- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -40,6 +40,7 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(FrameCookieKind) LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym2Flags) LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym3Flags) LLVM_YAML_DECLARE_BITSET_TRAITS(ExportFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(PublicSymFlags) LLVM_YAML_DECLARE_BITSET_TRAITS(LocalSymFlags) LLVM_YAML_DECLARE_BITSET_TRAITS(ProcSymFlags) LLVM_YAML_DECLARE_BITSET_TRAITS(FrameProcedureOptions) @@ -93,6 +94,14 @@ void ScalarBitSetTraits<ExportFlags>::bitset(IO &io, ExportFlags &Flags) { } } +void ScalarBitSetTraits<PublicSymFlags>::bitset(IO &io, PublicSymFlags &Flags) { + auto FlagNames = getProcSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast<PublicSymFlags>(E.Value)); + } +} + void ScalarBitSetTraits<LocalSymFlags>::bitset(IO &io, LocalSymFlags &Flags) { auto FlagNames = getLocalFlagNames(); for (const auto &E : FlagNames) { @@ -277,16 +286,15 @@ template <> void SymbolRecordImpl<ExportSym>::map(IO &IO) { } template <> void SymbolRecordImpl<ProcSym>::map(IO &IO) { - // TODO: Print the linkage name - - IO.mapRequired("PtrParent", Symbol.Parent); - IO.mapRequired("PtrEnd", Symbol.End); - IO.mapRequired("PtrNext", Symbol.Next); + IO.mapOptional("PtrParent", Symbol.Parent, 0U); + IO.mapOptional("PtrEnd", Symbol.End, 0U); + IO.mapOptional("PtrNext", Symbol.Next, 0U); IO.mapRequired("CodeSize", Symbol.CodeSize); IO.mapRequired("DbgStart", Symbol.DbgStart); IO.mapRequired("DbgEnd", Symbol.DbgEnd); IO.mapRequired("FunctionType", Symbol.FunctionType); - IO.mapRequired("Segment", Symbol.Segment); + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("Flags", Symbol.Flags); IO.mapRequired("DisplayName", Symbol.Name); } @@ -298,9 +306,9 @@ template <> void SymbolRecordImpl<RegisterSym>::map(IO &IO) { } template <> void SymbolRecordImpl<PublicSym32>::map(IO &IO) { - IO.mapRequired("Type", Symbol.Index); - IO.mapRequired("Seg", Symbol.Segment); - IO.mapRequired("Off", Symbol.Offset); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapOptional("Offset", Symbol.Offset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("Name", Symbol.Name); } @@ -316,8 +324,8 @@ template <> void SymbolRecordImpl<EnvBlockSym>::map(IO &IO) { } template <> void SymbolRecordImpl<InlineSiteSym>::map(IO &IO) { - IO.mapRequired("PtrParent", Symbol.Parent); - IO.mapRequired("PtrEnd", Symbol.End); + IO.mapOptional("PtrParent", Symbol.Parent, 0U); + IO.mapOptional("PtrEnd", Symbol.End, 0U); IO.mapRequired("Inlinee", Symbol.Inlinee); // TODO: The binary annotations } @@ -359,17 +367,17 @@ template <> void SymbolRecordImpl<DefRangeRegisterRelSym>::map(IO &IO) { } template <> void SymbolRecordImpl<BlockSym>::map(IO &IO) { - // TODO: Print the linkage name - IO.mapRequired("PtrParent", Symbol.Parent); - IO.mapRequired("PtrEnd", Symbol.End); + IO.mapOptional("PtrParent", Symbol.Parent, 0U); + IO.mapOptional("PtrEnd", Symbol.End, 0U); IO.mapRequired("CodeSize", Symbol.CodeSize); - IO.mapRequired("Segment", Symbol.Segment); + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("BlockName", Symbol.Name); } template <> void SymbolRecordImpl<LabelSym>::map(IO &IO) { - // TODO: Print the linkage name - IO.mapRequired("Segment", Symbol.Segment); + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("Flags", Symbol.Flags); IO.mapRequired("Flags", Symbol.Flags); IO.mapRequired("DisplayName", Symbol.Name); @@ -419,8 +427,8 @@ template <> void SymbolRecordImpl<FrameProcSym>::map(IO &IO) { } template <> void SymbolRecordImpl<CallSiteInfoSym>::map(IO &IO) { - // TODO: Map Linkage Name - IO.mapRequired("Segment", Symbol.Segment); + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("Type", Symbol.Type); } @@ -432,14 +440,13 @@ template <> void SymbolRecordImpl<FileStaticSym>::map(IO &IO) { } template <> void SymbolRecordImpl<HeapAllocationSiteSym>::map(IO &IO) { - // TODO: Map Linkage Name - IO.mapRequired("Segment", Symbol.Segment); + IO.mapOptional("Offset", Symbol.CodeOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("CallInstructionSize", Symbol.CallInstructionSize); IO.mapRequired("Type", Symbol.Type); } template <> void SymbolRecordImpl<FrameCookieSym>::map(IO &IO) { - // TODO: Map Linkage Name IO.mapRequired("Register", Symbol.Register); IO.mapRequired("CookieKind", Symbol.CookieKind); IO.mapRequired("Flags", Symbol.Flags); @@ -478,14 +485,16 @@ template <> void SymbolRecordImpl<ConstantSym>::map(IO &IO) { } template <> void SymbolRecordImpl<DataSym>::map(IO &IO) { - // TODO: Map linkage name IO.mapRequired("Type", Symbol.Type); + IO.mapOptional("Offset", Symbol.DataOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("DisplayName", Symbol.Name); } template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) { - // TODO: Map linkage name IO.mapRequired("Type", Symbol.Type); + IO.mapOptional("Offset", Symbol.DataOffset, 0U); + IO.mapOptional("Segment", Symbol.Segment, uint16_t(0)); IO.mapRequired("DisplayName", Symbol.Name); } } diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp index a03b9cd50faa2..2d1cb4b1b27b9 100644 --- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp +++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -138,7 +138,7 @@ StringRef ScalarTraits<TypeIndex>::input(StringRef Scalar, void *Ctx, void ScalarTraits<APSInt>::output(const APSInt &S, void *, llvm::raw_ostream &OS) { - S.print(OS, true); + S.print(OS, S.isSigned()); } StringRef ScalarTraits<APSInt>::input(StringRef Scalar, void *Ctx, APSInt &S) { diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 353d027f4e111..65703c6cf683a 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -47,14 +47,22 @@ static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) { IO.mapOptional("Relocations", Section.Relocations); } +static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) { + commonSectionMapping(IO, Section); + IO.mapRequired("Name", Section.Name); + IO.mapOptional("FunctionNames", Section.FunctionNames); +} + +static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) { + commonSectionMapping(IO, Section); + IO.mapRequired("Name", Section.Name); + IO.mapRequired("SymbolInfo", Section.SymbolInfos); +} + static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Name", Section.Name); - if (Section.Name == "name") { - IO.mapOptional("FunctionNames", Section.FunctionNames); - } else { - IO.mapRequired("Payload", Section.Payload); - } + IO.mapRequired("Payload", Section.Payload); } static void sectionMapping(IO &IO, WasmYAML::TypeSection &Section) { @@ -121,11 +129,29 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping( IO.mapRequired("Type", SectionType); switch (SectionType) { - case wasm::WASM_SEC_CUSTOM: - if (!IO.outputting()) - Section.reset(new WasmYAML::CustomSection()); - sectionMapping(IO, *cast<WasmYAML::CustomSection>(Section.get())); + case wasm::WASM_SEC_CUSTOM: { + StringRef SectionName; + if (IO.outputting()) { + auto CustomSection = cast<WasmYAML::CustomSection>(Section.get()); + SectionName = CustomSection->Name; + } else { + IO.mapRequired("Name", SectionName); + } + if (SectionName == "linking") { + if (!IO.outputting()) + Section.reset(new WasmYAML::LinkingSection()); + sectionMapping(IO, *cast<WasmYAML::LinkingSection>(Section.get())); + } else if (SectionName == "name") { + if (!IO.outputting()) + Section.reset(new WasmYAML::NameSection()); + sectionMapping(IO, *cast<WasmYAML::NameSection>(Section.get())); + } else { + if (!IO.outputting()) + Section.reset(new WasmYAML::CustomSection(SectionName)); + sectionMapping(IO, *cast<WasmYAML::CustomSection>(Section.get())); + } break; + } case wasm::WASM_SEC_TYPE: if (!IO.outputting()) Section.reset(new WasmYAML::TypeSection()); @@ -321,6 +347,12 @@ void MappingTraits<WasmYAML::DataSegment>::mapping( IO.mapRequired("Content", Segment.Content); } +void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO, + WasmYAML::SymbolInfo &Info) { + IO.mapRequired("Name", Info.Name); + IO.mapRequired("Flags", Info.Flags); +} + void ScalarEnumerationTraits<WasmYAML::ValueType>::enumeration( IO &IO, WasmYAML::ValueType &Type) { #define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X); diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index 52a81ff0e1594..acb9e8d015bce 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -194,6 +194,37 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str, return 0; } +// Returns true if one of the Prefixes + In.Names matches Option +static bool optionMatches(const OptTable::Info &In, StringRef Option) { + if (In.Values && In.Prefixes) + for (size_t I = 0; In.Prefixes[I]; I++) + if (Option == std::string(In.Prefixes[I]) + In.Name) + return true; + return false; +} + +// This function is for flag value completion. +// Eg. When "-stdlib=" and "l" was passed to this function, it will return +// appropiriate values for stdlib, which starts with l. +std::vector<std::string> +OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const { + // Search all options and return possible values. + for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { + if (!optionMatches(In, Option)) + continue; + + SmallVector<StringRef, 8> Candidates; + StringRef(In.Values).split(Candidates, ",", -1, false); + + std::vector<std::string> Result; + for (StringRef Val : Candidates) + if (Val.startswith(Arg)) + Result.push_back(Val); + return Result; + } + return {}; +} + std::vector<std::string> OptTable::findByPrefix(StringRef Cur) const { std::vector<std::string> Ret; for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { @@ -336,6 +367,9 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { case Option::FlagClass: break; + case Option::ValuesClass: + break; + case Option::SeparateClass: case Option::JoinedOrSeparateClass: case Option::RemainingArgsClass: case Option::RemainingArgsJoinedClass: Name += ' '; diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp index 4832e659f026d..bf9f040bde525 100644 --- a/lib/Option/Option.cpp +++ b/lib/Option/Option.cpp @@ -47,6 +47,7 @@ void Option::print(raw_ostream &O) const { P(UnknownClass); P(FlagClass); P(JoinedClass); + P(ValuesClass); P(SeparateClass); P(CommaJoinedClass); P(MultiArgClass); diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index afd66f55720a5..78d5ea955e644 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -464,10 +464,15 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, if (RunProfileGen) { MPM.addPass(PGOInstrumentationGen()); + FunctionPassManager FPM; + FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + // Add the profile lowering pass. InstrProfOptions Options; if (!ProfileGenFile.empty()) Options.InstrProfileOutput = ProfileGenFile; + Options.DoCounterPromotion = true; MPM.addPass(InstrProfiling(Options)); } @@ -923,9 +928,6 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.add(AlignmentFromAssumptionsPass()); #endif - // FIXME: Conditionally run LoadCombine here, after it's ported - // (in case we still have this pass, given its questionable usefulness). - // FIXME: add peephole extensions to the PM here. MainFPM.addPass(InstCombinePass()); MainFPM.addPass(JumpThreadingPass()); diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index 015b3c6c2021d..4534e086b39e2 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -1,4 +1,4 @@ -//===- CoverageMapping.cpp - Code coverage mapping support ------*- C++ -*-===// +//===- CoverageMapping.cpp - Code coverage mapping support ----------------===// // // The LLVM Compiler Infrastructure // @@ -200,6 +200,9 @@ Error CoverageMapping::loadFunctionRecord( const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader) { StringRef OrigFuncName = Record.FunctionName; + if (OrigFuncName.empty()) + return make_error<CoverageMapError>(coveragemap_error::malformed); + if (Record.Filenames.empty()) OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName); else @@ -300,8 +303,8 @@ namespace { /// An instantiation set is a collection of functions that have the same source /// code, ie, template functions specializations. class FunctionInstantiationSetCollector { - typedef DenseMap<std::pair<unsigned, unsigned>, - std::vector<const FunctionRecord *>> MapT; + using MapT = DenseMap<std::pair<unsigned, unsigned>, + std::vector<const FunctionRecord *>>; MapT InstantiatedFunctions; public: @@ -315,7 +318,6 @@ public: } MapT::iterator begin() { return InstantiatedFunctions.begin(); } - MapT::iterator end() { return InstantiatedFunctions.end(); } }; diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp index a34f359cd5427..fff0a03ccbe01 100644 --- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -1,4 +1,4 @@ -//===- CoverageMappingReader.cpp - Code coverage mapping reader -*- C++ -*-===// +//===- CoverageMappingReader.cpp - Code coverage mapping reader -----------===// // // The LLVM Compiler Infrastructure // @@ -62,7 +62,7 @@ void CoverageMappingIterator::increment() { } Error RawCoverageReader::readULEB128(uint64_t &Result) { - if (Data.size() < 1) + if (Data.empty()) return make_error<CoverageMapError>(coveragemap_error::truncated); unsigned N = 0; Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); @@ -392,9 +392,9 @@ struct CovMapFuncRecordReader { // A class for reading coverage mapping function records for a module. template <CovMapVersion Version, class IntPtrT, support::endianness Endian> class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { - typedef typename CovMapTraits< - Version, IntPtrT>::CovMapFuncRecordType FuncRecordType; - typedef typename CovMapTraits<Version, IntPtrT>::NameRefType NameRefType; + using FuncRecordType = + typename CovMapTraits<Version, IntPtrT>::CovMapFuncRecordType; + using NameRefType = typename CovMapTraits<Version, IntPtrT>::NameRefType; // Maps function's name references to the indexes of their records // in \c Records. @@ -419,6 +419,8 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { StringRef FuncName; if (Error Err = CFR->template getFuncName<Endian>(ProfileNames, FuncName)) return Err; + if (FuncName.empty()) + return make_error<InstrProfError>(instrprof_error::malformed); Records.emplace_back(Version, FuncName, FuncHash, Mapping, FilenamesBegin, Filenames.size() - FilenamesBegin); return Error::success(); @@ -574,7 +576,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames, Endian = support::endianness::little; Data = Data.substr(StringRef(TestingFormatMagic).size()); - if (Data.size() < 1) + if (Data.empty()) return make_error<CoverageMapError>(coveragemap_error::truncated); unsigned N = 0; auto ProfileNamesSize = @@ -582,7 +584,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames, if (N > Data.size()) return make_error<CoverageMapError>(coveragemap_error::malformed); Data = Data.substr(N); - if (Data.size() < 1) + if (Data.empty()) return make_error<CoverageMapError>(coveragemap_error::truncated); N = 0; uint64_t Address = @@ -596,7 +598,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames, return E; CoverageMapping = Data.substr(ProfileNamesSize); // Skip the padding bytes because coverage map data has an alignment of 8. - if (CoverageMapping.size() < 1) + if (CoverageMapping.empty()) return make_error<CoverageMapError>(coveragemap_error::truncated); size_t Pad = alignmentAdjustment(CoverageMapping.data(), 8); if (CoverageMapping.size() < Pad) diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index c9b82c303e338..005061c4f0680 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -330,14 +330,15 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName) { return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), PGOFuncName); } -void InstrProfSymtab::create(Module &M, bool InLTO) { +Error InstrProfSymtab::create(Module &M, bool InLTO) { for (Function &F : M) { // Function may not have a name: like using asm("") to overwrite the name. // Ignore in this case. if (!F.hasName()) continue; const std::string &PGOFuncName = getPGOFuncName(F, InLTO); - addFuncName(PGOFuncName); + if (Error E = addFuncName(PGOFuncName)) + return E; MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); // In ThinLTO, local function may have been promoted to global and have // suffix added to the function name. We need to add the stripped function @@ -346,13 +347,15 @@ void InstrProfSymtab::create(Module &M, bool InLTO) { auto pos = PGOFuncName.find('.'); if (pos != std::string::npos) { const std::string &OtherFuncName = PGOFuncName.substr(0, pos); - addFuncName(OtherFuncName); + if (Error E = addFuncName(OtherFuncName)) + return E; MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); } } } finalizeSymtab(); + return Error::success(); } Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs, @@ -447,7 +450,8 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { SmallVector<StringRef, 0> Names; NameStrings.split(Names, getInstrProfNameSeparator()); for (StringRef &Name : Names) - Symtab.addFuncName(Name); + if (Error E = Symtab.addFuncName(Name)) + return E; while (P < EndP && *P == 0) P++; diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index d9f599f400da5..1ed1fb8b6f0b5 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -200,7 +200,8 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { std::pair<StringRef, StringRef> VD = Line->rsplit(':'); uint64_t TakenCount, Value; if (ValueKind == IPVK_IndirectCallTarget) { - Symtab->addFuncName(VD.first); + if (Error E = Symtab->addFuncName(VD.first)) + return E; Value = IndexedInstrProf::ComputeHash(VD.first); } else { READ_NUM(VD.first, Value); @@ -232,7 +233,8 @@ Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Read the function name. Record.Name = *Line++; - Symtab->addFuncName(Record.Name); + if (Error E = Symtab->addFuncName(Record.Name)) + return E; // Read the function hash. if (Line.is_at_end()) @@ -482,8 +484,8 @@ InstrProfLookupTrait::ComputeHash(StringRef K) { return IndexedInstrProf::ComputeHash(HashType, K); } -typedef InstrProfLookupTrait::data_type data_type; -typedef InstrProfLookupTrait::offset_type offset_type; +using data_type = InstrProfLookupTrait::data_type; +using offset_type = InstrProfLookupTrait::offset_type; bool InstrProfLookupTrait::readValueProfilingData( const unsigned char *&D, const unsigned char *const End) { @@ -620,7 +622,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); - llvm::SummaryEntryVector DetailedSummary; + SummaryEntryVector DetailedSummary; for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, @@ -694,7 +696,9 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { return *Symtab.get(); std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>(); - Index->populateSymtab(*NewSymtab.get()); + if (Error E = Index->populateSymtab(*NewSymtab.get())) { + consumeError(error(InstrProfError::take(std::move(E)))); + } Symtab = std::move(NewSymtab); return *Symtab.get(); diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index b3402a6ea956c..9efea78ed2a89 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -69,8 +69,7 @@ public: write(P[K].D[I]); } } else { - raw_string_ostream &SOStream = - static_cast<llvm::raw_string_ostream &>(OS); + raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS); std::string &Data = SOStream.str(); // with flush for (int K = 0; K < NItems; K++) { for (int I = 0; I < P[K].N; I++) { @@ -91,14 +90,14 @@ public: class InstrProfRecordWriterTrait { public: - typedef StringRef key_type; - typedef StringRef key_type_ref; + using key_type = StringRef; + using key_type_ref = StringRef; - typedef const InstrProfWriter::ProfilingData *const data_type; - typedef const InstrProfWriter::ProfilingData *const data_type_ref; + using data_type = const InstrProfWriter::ProfilingData *const; + using data_type_ref = const InstrProfWriter::ProfilingData *const; - typedef uint64_t hash_value_type; - typedef uint64_t offset_type; + using hash_value_type = uint64_t; + using offset_type = uint64_t; support::endianness ValueProfDataEndianness = support::little; InstrProfSummaryBuilder *SummaryBuilder; @@ -363,17 +362,19 @@ void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, OS << "\n"; } -void InstrProfWriter::writeText(raw_fd_ostream &OS) { +Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (ProfileKind == PF_IRLevel) OS << "# IR level Instrumentation Flag\n:ir\n"; InstrProfSymtab Symtab; for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) - Symtab.addFuncName(I.getKey()); + if (Error E = Symtab.addFuncName(I.getKey())) + return E; Symtab.finalizeSymtab(); for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) for (const auto &Func : I.getValue()) writeRecordInText(Func.second, Symtab, OS); + return Error::success(); } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f36c25a0ce914..deb76cb565d1e 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -37,10 +37,6 @@ using namespace llvm; -// TODO: Remove these and use APInt qualified types directly. -typedef APInt::WordType integerPart; -const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD; - /// A macro used to combine two fcCategory enums into one key which can be used /// in a switch statement to classify how the interaction of two APFloat's /// categories affects an operation. @@ -51,7 +47,7 @@ const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD; /* Assumed in hexadecimal significand parsing, and conversion to hexadecimal strings. */ -static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); +static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); namespace llvm { /* Represents floating point arithmetic semantics. */ @@ -153,8 +149,7 @@ namespace llvm { const unsigned int maxExponent = 16383; const unsigned int maxPrecision = 113; const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; - const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) - / (351 * integerPartWidth)); + const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { return semantics.precision; @@ -180,7 +175,7 @@ namespace llvm { static inline unsigned int partCountForBits(unsigned int bits) { - return ((bits) + integerPartWidth - 1) / integerPartWidth; + return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; } /* Returns 0U-9U. Return values >= 10U are not digits. */ @@ -420,7 +415,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* Return the fraction lost were a bignum truncated losing the least significant BITS bits. */ static lostFraction -lostFractionThroughTruncation(const integerPart *parts, +lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits) { @@ -433,7 +428,7 @@ lostFractionThroughTruncation(const integerPart *parts, return lfExactlyZero; if (bits == lsb + 1) return lfExactlyHalf; - if (bits <= partCount * integerPartWidth && + if (bits <= partCount * APFloatBase::integerPartWidth && APInt::tcExtractBit(parts, bits - 1)) return lfMoreThanHalf; @@ -442,7 +437,7 @@ lostFractionThroughTruncation(const integerPart *parts, /* Shift DST right BITS bits noting lost fraction. */ static lostFraction -shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) +shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) { lostFraction lost_fraction; @@ -489,22 +484,22 @@ HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) /* The number of ulps from the boundary (zero, or half if ISNEAREST) when the least significant BITS are truncated. BITS cannot be zero. */ -static integerPart -ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) -{ +static APFloatBase::integerPart +ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, + bool isNearest) { unsigned int count, partBits; - integerPart part, boundary; + APFloatBase::integerPart part, boundary; assert(bits != 0); bits--; - count = bits / integerPartWidth; - partBits = bits % integerPartWidth + 1; + count = bits / APFloatBase::integerPartWidth; + partBits = bits % APFloatBase::integerPartWidth + 1; - part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); + part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); if (isNearest) - boundary = (integerPart) 1 << (partBits - 1); + boundary = (APFloatBase::integerPart) 1 << (partBits - 1); else boundary = 0; @@ -518,32 +513,30 @@ ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) if (part == boundary) { while (--count) if (parts[count]) - return ~(integerPart) 0; /* A lot. */ + return ~(APFloatBase::integerPart) 0; /* A lot. */ return parts[0]; } else if (part == boundary - 1) { while (--count) if (~parts[count]) - return ~(integerPart) 0; /* A lot. */ + return ~(APFloatBase::integerPart) 0; /* A lot. */ return -parts[0]; } - return ~(integerPart) 0; /* A lot. */ + return ~(APFloatBase::integerPart) 0; /* A lot. */ } /* Place pow(5, power) in DST, and return the number of parts used. DST must be at least one part larger than size of the answer. */ static unsigned int -powerOf5(integerPart *dst, unsigned int power) -{ - static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, - 15625, 78125 }; - integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; +powerOf5(APFloatBase::integerPart *dst, unsigned int power) { + static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; + APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; pow5s[0] = 78125 * 5; unsigned int partsCount[16] = { 1 }; - integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; + APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; unsigned int result; assert(power <= maxExponent); @@ -572,7 +565,7 @@ powerOf5(integerPart *dst, unsigned int power) } if (power & 1) { - integerPart *tmp; + APFloatBase::integerPart *tmp; APInt::tcFullMultiply(p2, p1, pow5, result, pc); result += pc; @@ -608,14 +601,14 @@ static const char NaNU[] = "NAN"; significant nibble. Write out exactly COUNT hexdigits, return COUNT. */ static unsigned int -partAsHex (char *dst, integerPart part, unsigned int count, +partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars) { unsigned int result = count; - assert(count != 0 && count <= integerPartWidth / 4); + assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); - part >>= (integerPartWidth - 4 * count); + part >>= (APFloatBase::integerPartWidth - 4 * count); while (count--) { dst[count] = hexDigitChars[part & 0xf]; part >>= 4; @@ -889,11 +882,11 @@ unsigned int IEEEFloat::partCount() const { return partCountForBits(semantics->precision + 1); } -const integerPart *IEEEFloat::significandParts() const { +const IEEEFloat::integerPart *IEEEFloat::significandParts() const { return const_cast<IEEEFloat *>(this)->significandParts(); } -integerPart *IEEEFloat::significandParts() { +IEEEFloat::integerPart *IEEEFloat::significandParts() { if (partCount() > 1) return significand.parts; else @@ -916,7 +909,7 @@ void IEEEFloat::incrementSignificand() { } /* Add the significand of the RHS. Returns the carry flag. */ -integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { +IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { integerPart *parts; parts = significandParts(); @@ -929,8 +922,8 @@ integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { /* Subtract the significand of the RHS with a borrow flag. Returns the borrow flag. */ -integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, - integerPart borrow) { +IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, + integerPart borrow) { integerPart *parts; parts = significandParts(); diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index e9716e3b1e872..c558ddd82161d 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -546,10 +546,7 @@ unsigned APInt::countLeadingZerosSlowCase() const { return Count; } -unsigned APInt::countLeadingOnes() const { - if (isSingleWord()) - return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); - +unsigned APInt::countLeadingOnesSlowCase() const { unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; unsigned shift; if (!highWordBits) { @@ -573,9 +570,7 @@ unsigned APInt::countLeadingOnes() const { return Count; } -unsigned APInt::countTrailingZeros() const { - if (isSingleWord()) - return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth); +unsigned APInt::countTrailingZerosSlowCase() const { unsigned Count = 0; unsigned i = 0; for (; i < getNumWords() && U.pVal[i] == 0; ++i) diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp index bfb658cfa0b74..e00527f2519e1 100644 --- a/lib/Support/BinaryStreamReader.cpp +++ b/lib/Support/BinaryStreamReader.cpp @@ -109,6 +109,12 @@ Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref, uint32_t Length) { return Error::success(); } +Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Stream, + uint32_t Size) { + Stream.Offset = getOffset(); + return readStreamRef(Stream.StreamData, Size); +} + Error BinaryStreamReader::skip(uint32_t Amount) { if (Amount > bytesRemaining()) return make_error<BinaryStreamError>(stream_error_code::stream_too_short); diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp index aca1236395655..60d0964f27646 100644 --- a/lib/Support/CachePruning.cpp +++ b/lib/Support/CachePruning.cpp @@ -82,7 +82,7 @@ llvm::parseCachePruningPolicy(StringRef PolicyStr) { if (Value.back() != '%') return make_error<StringError>("'" + Value + "' must be a percentage", inconvertibleErrorCode()); - StringRef SizeStr = Value.slice(0, Value.size() - 1); + StringRef SizeStr = Value.drop_back(); uint64_t Size; if (SizeStr.getAsInteger(0, Size)) return make_error<StringError>("'" + SizeStr + "' not an integer", @@ -91,7 +91,28 @@ llvm::parseCachePruningPolicy(StringRef PolicyStr) { return make_error<StringError>("'" + SizeStr + "' must be between 0 and 100", inconvertibleErrorCode()); - Policy.PercentageOfAvailableSpace = Size; + Policy.MaxSizePercentageOfAvailableSpace = Size; + } else if (Key == "cache_size_bytes") { + uint64_t Mult = 1; + switch (tolower(Value.back())) { + case 'k': + Mult = 1024; + Value = Value.drop_back(); + break; + case 'm': + Mult = 1024 * 1024; + Value = Value.drop_back(); + break; + case 'g': + Mult = 1024 * 1024 * 1024; + Value = Value.drop_back(); + break; + } + uint64_t Size; + if (Value.getAsInteger(0, Size)) + return make_error<StringError>("'" + Value + "' not an integer", + inconvertibleErrorCode()); + Policy.MaxSizeBytes = Size * Mult; } else { return make_error<StringError>("Unknown key: '" + Key + "'", inconvertibleErrorCode()); @@ -115,11 +136,12 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { if (!isPathDir) return false; - Policy.PercentageOfAvailableSpace = - std::min(Policy.PercentageOfAvailableSpace, 100u); + Policy.MaxSizePercentageOfAvailableSpace = + std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u); if (Policy.Expiration == seconds(0) && - Policy.PercentageOfAvailableSpace == 0) { + Policy.MaxSizePercentageOfAvailableSpace == 0 && + Policy.MaxSizeBytes == 0) { DEBUG(dbgs() << "No pruning settings set, exit early\n"); // Nothing will be pruned, early exit return false; @@ -157,7 +179,8 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { writeTimestampFile(TimestampFile); } - bool ShouldComputeSize = (Policy.PercentageOfAvailableSpace > 0); + bool ShouldComputeSize = + (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0); // Keep track of space std::set<std::pair<uint64_t, std::string>> FileSizes; @@ -216,14 +239,22 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { } sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get(); auto AvailableSpace = TotalSize + SpaceInfo.free; - auto FileAndSize = FileSizes.rbegin(); + + if (Policy.MaxSizePercentageOfAvailableSpace == 0) + Policy.MaxSizePercentageOfAvailableSpace = 100; + if (Policy.MaxSizeBytes == 0) + Policy.MaxSizeBytes = AvailableSpace; + auto TotalSizeTarget = std::min<uint64_t>( + AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull, + Policy.MaxSizeBytes); + DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace) - << "% target is: " << Policy.PercentageOfAvailableSpace - << "\n"); + << "% target is: " << Policy.MaxSizePercentageOfAvailableSpace + << "%, " << Policy.MaxSizeBytes << " bytes\n"); + + auto FileAndSize = FileSizes.rbegin(); // Remove the oldest accessed files first, till we get below the threshold - while (((100 * TotalSize) / AvailableSpace) > - Policy.PercentageOfAvailableSpace && - FileAndSize != FileSizes.rend()) { + while (TotalSize > TotalSizeTarget && FileAndSize != FileSizes.rend()) { // Remove the file. sys::fs::remove(FileAndSize->second); // Update size diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index de0ca940b405f..0345a5e3d2a1d 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" @@ -1522,13 +1523,9 @@ bool parser<unsigned long long>::parse(Option &O, StringRef ArgName, // parser<double>/parser<float> implementation // static bool parseDouble(Option &O, StringRef Arg, double &Value) { - SmallString<32> TmpStr(Arg.begin(), Arg.end()); - const char *ArgStart = TmpStr.c_str(); - char *End; - Value = strtod(ArgStart, &End); - if (*End != 0) - return O.error("'" + Arg + "' value invalid for floating point argument!"); - return false; + if (to_float(Arg, Value)) + return false; + return O.error("'" + Arg + "' value invalid for floating point argument!"); } bool parser<double>::parse(Option &O, StringRef ArgName, StringRef Arg, diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 53c10bcc562e3..0199b300ba72d 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -68,6 +68,13 @@ uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, Data.data()); } +uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const { + uint24_t ExtractedVal = + getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data()); + // The 3 bytes are in the correct byte order for the host. + return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); +} + uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data()); } diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index f70b77da8de47..e04bd8bb3b9a1 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -1,4 +1,4 @@ -//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===// +//===- GraphWriter.cpp - Implements GraphWriter support routines ----------===// // // The LLVM Compiler Infrastructure // @@ -12,10 +12,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/GraphWriter.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <system_error> +#include <string> +#include <vector> + using namespace llvm; static cl::opt<bool> ViewBackground("view-background", cl::Hidden, @@ -99,8 +111,10 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args, } namespace { + struct GraphSession { std::string LogBuffer; + bool TryFindProgram(StringRef Names, std::string &ProgramPath) { raw_string_ostream Log(LogBuffer); SmallVector<StringRef, 8> parts; @@ -115,7 +129,8 @@ struct GraphSession { return false; } }; -} // namespace + +} // end anonymous namespace static const char *getProgramName(GraphProgram::Name program) { switch (program) { diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 320aede79fbb0..2687a67556d3e 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -174,6 +174,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { switch (Kind) { case UnknownOS: return "unknown"; + case Ananas: return "ananas"; case CloudABI: return "cloudabi"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; @@ -455,6 +456,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { static Triple::OSType parseOS(StringRef OSName) { return StringSwitch<Triple::OSType>(OSName) + .StartsWith("ananas", Triple::Ananas) .StartsWith("cloudabi", Triple::CloudABI) .StartsWith("darwin", Triple::Darwin) .StartsWith("dragonfly", Triple::DragonFly) diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index 457217125a222..0ba6a25aa198d 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -45,5 +45,11 @@ std::string sys::getDefaultTargetTriple() { TargetTripleString += getOSVersion(); } + // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + TargetTripleString = EnvTriple; +#endif + return Triple::normalize(TargetTripleString); } diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index edbc7938f0cbf..dd39ef935bf92 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -195,6 +195,10 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__)) void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC, flags, fd, 0); +#elif defined(__NetBSD__) && defined(PROT_MPROTECT) + void *pa = + ::mmap(start, PageSize * NumPages, + PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC), flags, fd, 0); #else void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC, flags, fd, 0); diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 2df0eaff47e52..1704fa4799428 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -449,11 +449,22 @@ bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<co size_t ArgLength = Program.size() + 1; for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { - ArgLength += strlen(*I) + 1; + size_t length = strlen(*I); + + // Ensure that we do not exceed the MAX_ARG_STRLEN constant on Linux, which + // does not have a constant unlike what the man pages would have you + // believe. Since this limit is pretty high, perform the check + // unconditionally rather than trying to be aggressive and limiting it to + // Linux only. + if (length >= (32 * 4096)) + return false; + + ArgLength += length + 1; if (ArgLength > size_t(HalfArgMax)) { return false; } } + return true; } } diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc index fe89fe0aad8c4..7e196cf0ce18a 100644 --- a/lib/Support/Windows/Host.inc +++ b/lib/Support/Windows/Host.inc @@ -18,5 +18,13 @@ using namespace llvm; std::string sys::getDefaultTargetTriple() { - return Triple::normalize(LLVM_DEFAULT_TARGET_TRIPLE); + const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE; + + // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + Triple = EnvTriple; +#endif + + return Triple::normalize(Triple); } diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 01ae3214453dc..e2f21a56a810a 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -1,4 +1,4 @@ -//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// +//===- YAMLParser.cpp - Simple YAML parser --------------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,15 +13,29 @@ #include "llvm/Support/YAMLParser.h" #include "llvm/ADT/AllocatorList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <map> +#include <memory> +#include <string> +#include <system_error> +#include <utility> using namespace llvm; using namespace yaml; @@ -37,7 +51,7 @@ enum UnicodeEncodingForm { /// EncodingInfo - Holds the encoding type and length of the byte order mark if /// it exists. Length is in {0, 2, 3, 4}. -typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo; +using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode /// encoding form of \a Input. @@ -46,7 +60,7 @@ typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo; /// @returns An EncodingInfo indicating the Unicode encoding form of the input /// and how long the byte order mark is if one exists. static EncodingInfo getUnicodeEncoding(StringRef Input) { - if (Input.size() == 0) + if (Input.empty()) return std::make_pair(UEF_Unknown, 0); switch (uint8_t(Input[0])) { @@ -95,8 +109,6 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) { return std::make_pair(UEF_UTF8, 0); } -namespace llvm { -namespace yaml { /// Pin the vtables to this file. void Node::anchor() {} void NullNode::anchor() {} @@ -107,6 +119,9 @@ void MappingNode::anchor() {} void SequenceNode::anchor() {} void AliasNode::anchor() {} +namespace llvm { +namespace yaml { + /// Token - A single YAML token. struct Token { enum TokenKind { @@ -133,7 +148,7 @@ struct Token { TK_Alias, TK_Anchor, TK_Tag - } Kind; + } Kind = TK_Error; /// A string of length 0 or more whose begin() points to the logical location /// of the token in the input. @@ -142,14 +157,16 @@ struct Token { /// The value of a block scalar node. std::string Value; - Token() : Kind(TK_Error) {} + Token() = default; }; -} -} -typedef llvm::BumpPtrList<Token> TokenQueueT; +} // end namespace yaml +} // end namespace llvm + +using TokenQueueT = BumpPtrList<Token>; namespace { + /// @brief This struct is used to track simple keys. /// /// Simple keys are handled by creating an entry in SimpleKeys for each Token @@ -170,12 +187,13 @@ struct SimpleKey { return Tok == Other.Tok; } }; -} + +} // end anonymous namespace /// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit /// subsequence and the subsequence's length in code units (uint8_t). /// A length of 0 represents an error. -typedef std::pair<uint32_t, unsigned> UTF8Decoded; +using UTF8Decoded = std::pair<uint32_t, unsigned>; static UTF8Decoded decodeUTF8(StringRef Range) { StringRef::iterator Position= Range.begin(); @@ -229,6 +247,7 @@ static UTF8Decoded decodeUTF8(StringRef Range) { namespace llvm { namespace yaml { + /// @brief Scans YAML tokens from a MemoryBuffer. class Scanner { public: @@ -350,7 +369,8 @@ private: /// ns-char. StringRef::iterator skip_ns_char(StringRef::iterator Position); - typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); + using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); + /// @brief Skip minimal well-formed code unit subsequences until Func /// returns its input. /// @@ -655,10 +675,10 @@ bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { } bool yaml::scanTokens(StringRef Input) { - llvm::SourceMgr SM; - llvm::yaml::Scanner scanner(Input, SM); - for (;;) { - llvm::yaml::Token T = scanner.getNext(); + SourceMgr SM; + Scanner scanner(Input, SM); + while (true) { + Token T = scanner.getNext(); if (T.Kind == Token::TK_StreamEnd) break; else if (T.Kind == Token::TK_Error) @@ -1744,7 +1764,7 @@ Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, std::error_code *EC) : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} -Stream::~Stream() {} +Stream::~Stream() = default; bool Stream::failed() { return scanner->failed(); } @@ -1851,8 +1871,6 @@ bool Node::failed() const { return Doc->failed(); } - - StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { // TODO: Handle newlines properly. We need to remove leading whitespace. if (Value[0] == '"') { // Double quoted. diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index c410b1d560860..601084f9eae3c 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -8,17 +8,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLTraits.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" -#include <cctype> +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> #include <cstring> +#include <string> +#include <vector> + using namespace llvm; using namespace yaml; @@ -26,11 +36,9 @@ using namespace yaml; // IO //===----------------------------------------------------------------------===// -IO::IO(void *Context) : Ctxt(Context) { -} +IO::IO(void *Context) : Ctxt(Context) {} -IO::~IO() { -} +IO::~IO() = default; void *IO::getContext() { return Ctxt; @@ -46,15 +54,13 @@ void IO::setContext(void *Context) { Input::Input(StringRef InputContent, void *Ctxt, SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt) - : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)), - CurrentNode(nullptr) { + : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)) { if (DiagHandler) SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); DocIterator = Strm->begin(); } -Input::~Input() { -} +Input::~Input() = default; std::error_code Input::error() { return EC; } @@ -398,13 +404,9 @@ bool Input::canElideEmptySequence() { //===----------------------------------------------------------------------===// Output::Output(raw_ostream &yout, void *context, int WrapColumn) - : IO(context), Out(yout), WrapColumn(WrapColumn), Column(0), - ColumnAtFlowStart(0), ColumnAtMapFlowStart(0), NeedBitValueComma(false), - NeedFlowSequenceComma(false), EnumerationMatchFound(false), - NeedsNewLine(false), WriteDefaultValues(false) {} + : IO(context), Out(yout), WrapColumn(WrapColumn) {} -Output::~Output() { -} +Output::~Output() = default; bool Output::outputting() { return true; @@ -911,12 +913,9 @@ void ScalarTraits<double>::output(const double &Val, void *, raw_ostream &Out) { } StringRef ScalarTraits<double>::input(StringRef Scalar, void *, double &Val) { - SmallString<32> buff(Scalar.begin(), Scalar.end()); - char *end; - Val = strtod(buff.c_str(), &end); - if (*end != '\0') - return "invalid floating point number"; - return StringRef(); + if (to_float(Scalar, Val)) + return StringRef(); + return "invalid floating point number"; } void ScalarTraits<float>::output(const float &Val, void *, raw_ostream &Out) { @@ -924,12 +923,9 @@ void ScalarTraits<float>::output(const float &Val, void *, raw_ostream &Out) { } StringRef ScalarTraits<float>::input(StringRef Scalar, void *, float &Val) { - SmallString<32> buff(Scalar.begin(), Scalar.end()); - char *end; - Val = strtod(buff.c_str(), &end); - if (*end != '\0') - return "invalid floating point number"; - return StringRef(); + if (to_float(Scalar, Val)) + return StringRef(); + return "invalid floating point number"; } void ScalarTraits<Hex8>::output(const Hex8 &Val, void *, raw_ostream &Out) { diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 1abc8ed8683d5..9480cd46d28fc 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -548,7 +548,11 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { pos += Size; #ifndef LLVM_ON_WIN32 +#if defined(__linux__) + bool ShouldWriteInChunks = true; +#else bool ShouldWriteInChunks = false; +#endif #else // Writing a large size of output to Windows console returns ENOMEM. It seems // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index 3e0e3978b90b5..37b9690d0434a 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -31,6 +31,7 @@ class MachineFunctionPass; FunctionPass *createAArch64DeadRegisterDefinitions(); FunctionPass *createAArch64RedundantCopyEliminationPass(); +FunctionPass *createAArch64CondBrTuning(); FunctionPass *createAArch64ConditionalCompares(); FunctionPass *createAArch64AdvSIMDScalar(); FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, @@ -55,6 +56,7 @@ void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); +void initializeAArch64CondBrTuningPass(PassRegistry &); void initializeAArch64ConditionalComparesPass(PassRegistry&); void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp new file mode 100644 index 0000000000000..f27bc97ec3f3e --- /dev/null +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -0,0 +1,336 @@ +//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions +/// into a conditional branch (B.cond), when the NZCV flags can be set for +/// "free". This is preferred on targets that have more flexibility when +/// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming +/// all other variables are equal). This can also reduce register pressure. +/// +/// A few examples: +/// +/// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS. +/// cbz w8, .LBB_2 -> b.eq .LBB0_2 +/// +/// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses. +/// cbz w8, .LBB1_2 -> b.eq .LBB1_2 +/// +/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. +/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2 +/// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-cond-br-tuning" +#define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning" + +namespace { +class AArch64CondBrTuning : public MachineFunctionPass { + const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; + + MachineRegisterInfo *MRI; + +public: + static char ID; + AArch64CondBrTuning() : MachineFunctionPass(ID) { + initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; } + +private: + MachineInstr *getOperandDef(const MachineOperand &MO); + MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting); + MachineInstr *convertToCondBr(MachineInstr &MI); + bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI); +}; +} // end anonymous namespace + +char AArch64CondBrTuning::ID = 0; + +INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning", + AARCH64_CONDBR_TUNING_NAME, false, false) + +void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { + if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) + return nullptr; + return MRI->getUniqueVRegDef(MO.getReg()); +} + +MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, + bool IsFlagSetting) { + // If this is already the flag setting version of the instruction (e.g., SUBS) + // just make sure the implicit-def of NZCV isn't marked dead. + if (IsFlagSetting) { + for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands(); + I != E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV) + MO.setIsDead(false); + } + return &MI; + } + bool Is64Bit; + unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); + unsigned NewDestReg = MI.getOperand(0).getReg(); + if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) + NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; + + MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + TII->get(NewOpc), NewDestReg); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + MIB.add(MI.getOperand(I)); + + return MIB; +} + +MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { + AArch64CC::CondCode CC; + MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBZX: + CC = AArch64CC::EQ; + break; + case AArch64::CBNZW: + case AArch64::CBNZX: + CC = AArch64CC::NE; + break; + case AArch64::TBZW: + case AArch64::TBZX: + CC = AArch64CC::GE; + break; + case AArch64::TBNZW: + case AArch64::TBNZX: + CC = AArch64CC::LT; + break; + } + return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TargetMBB); +} + +bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, + MachineInstr &DefMI) { + // We don't want NZCV bits live across blocks. + if (MI.getParent() != DefMI.getParent()) + return false; + + bool IsFlagSetting = true; + unsigned MIOpc = MI.getOpcode(); + MachineInstr *NewCmp = nullptr, *NewBr = nullptr; + switch (DefMI.getOpcode()) { + default: + return false; + case AArch64::ADDWri: + case AArch64::ADDWrr: + case AArch64::ADDWrs: + case AArch64::ADDWrx: + case AArch64::ANDWri: + case AArch64::ANDWrr: + case AArch64::ANDWrs: + case AArch64::BICWrr: + case AArch64::BICWrs: + case AArch64::SUBWri: + case AArch64::SUBWrr: + case AArch64::SUBWrs: + case AArch64::SUBWrx: + IsFlagSetting = false; + case AArch64::ADDSWri: + case AArch64::ADDSWrr: + case AArch64::ADDSWrs: + case AArch64::ADDSWrx: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::BICSWrr: + case AArch64::BICSWrs: + case AArch64::SUBSWri: + case AArch64::SUBSWrr: + case AArch64::SUBSWrs: + case AArch64::SUBSWrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::TBZW: + case AArch64::TBNZW: + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) && + MI.getOperand(1).getImm() != 31) + return false; + + // There must not be any instruction between DefMI and MI that clobbers or + // reads NZCV. + MachineBasicBlock::iterator I(DefMI), E(MI); + for (I = std::next(I); I != E; ++I) { + if (I->modifiesRegister(AArch64::NZCV, TRI) || + I->readsRegister(AArch64::NZCV, TRI)) + return false; + } + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + break; + + case AArch64::ADDXri: + case AArch64::ADDXrr: + case AArch64::ADDXrs: + case AArch64::ADDXrx: + case AArch64::ANDXri: + case AArch64::ANDXrr: + case AArch64::ANDXrs: + case AArch64::BICXrr: + case AArch64::BICXrs: + case AArch64::SUBXri: + case AArch64::SUBXrr: + case AArch64::SUBXrs: + case AArch64::SUBXrx: + IsFlagSetting = false; + case AArch64::ADDSXri: + case AArch64::ADDSXrr: + case AArch64::ADDSXrs: + case AArch64::ADDSXrx: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSXrr: + case AArch64::BICSXrs: + case AArch64::SUBSXri: + case AArch64::SUBSXrr: + case AArch64::SUBSXrs: + case AArch64::SUBSXrx: + switch (MIOpc) { + default: + llvm_unreachable("Unexpected opcode!"); + + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::TBZX: + case AArch64::TBNZX: { + // Check to see if the TBZ/TBNZ is checking the sign bit. + if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) && + MI.getOperand(1).getImm() != 63) + return false; + // There must not be any instruction between DefMI and MI that clobbers or + // reads NZCV. + MachineBasicBlock::iterator I(DefMI), E(MI); + for (I = std::next(I); I != E; ++I) { + if (I->modifiesRegister(AArch64::NZCV, TRI) || + I->readsRegister(AArch64::NZCV, TRI)) + return false; + } + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(DefMI.print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(MI.print(dbgs())); + + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewBr = convertToCondBr(MI); + break; + } + } + break; + } + assert(NewCmp && NewBr && "Expected new instructions."); + + DEBUG(dbgs() << " with instruction:\n "); + DEBUG(NewCmp->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(NewBr->print(dbgs())); + + // If this was a flag setting version of the instruction, we use the original + // instruction by just clearing the dead marked on the implicit-def of NCZV. + // Therefore, we should not erase this instruction. + if (!IsFlagSetting) + DefMI.eraseFromParent(); + MI.eraseFromParent(); + return true; +} + +bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n" + << "********** Function: " << MF.getName() << '\n'); + + TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + bool LocalChange = false; + for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(), + E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: + break; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: + MachineInstr *DefMI = getOperandDef(MI.getOperand(0)); + LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI)); + break; + } + // If the optimization was successful, we can't optimize any other + // branches because doing so would clobber the NZCV flags. + if (LocalChange) { + Changed = true; + break; + } + } + } + return Changed; +} + +FunctionPass *llvm::createAArch64CondBrTuning() { + return new AArch64CondBrTuning(); +} diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 544f67433fd53..ee54550c9900b 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -13,7 +13,9 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -84,6 +86,51 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n"); continue; } + if (MF.getSubtarget<AArch64Subtarget>().hasLSE()) { + // XZ/WZ for LSE can only be used when acquire semantics are not used, + // LDOPAL WZ is an invalid opcode. + switch (MI.getOpcode()) { + case AArch64::CASALb: + case AArch64::CASALh: + case AArch64::CASALs: + case AArch64::CASALd: + case AArch64::SWPALb: + case AArch64::SWPALh: + case AArch64::SWPALs: + case AArch64::SWPALd: + case AArch64::LDADDALb: + case AArch64::LDADDALh: + case AArch64::LDADDALs: + case AArch64::LDADDALd: + case AArch64::LDEORALb: + case AArch64::LDEORALh: + case AArch64::LDEORALs: + case AArch64::LDEORALd: + case AArch64::LDSETALb: + case AArch64::LDSETALh: + case AArch64::LDSETALs: + case AArch64::LDSETALd: + case AArch64::LDSMINALb: + case AArch64::LDSMINALh: + case AArch64::LDSMINALs: + case AArch64::LDSMINALd: + case AArch64::LDSMAXALb: + case AArch64::LDSMAXALh: + case AArch64::LDSMAXALs: + case AArch64::LDSMAXALd: + case AArch64::LDUMINALb: + case AArch64::LDUMINALh: + case AArch64::LDUMINALs: + case AArch64::LDUMINALd: + case AArch64::LDUMAXALb: + case AArch64::LDUMAXALh: + case AArch64::LDUMAXALs: + case AArch64::LDUMAXALd: + continue; + default: + break; + } + } const MCInstrDesc &Desc = MI.getDesc(); for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 8c2c0a564c302..04687847c1a30 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -201,7 +201,7 @@ private: bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); - void SelectCMP_SWAP(SDNode *N); + bool SelectCMP_SWAP(SDNode *N); }; } // end anonymous namespace @@ -2609,9 +2609,13 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { } /// We've got special pseudo-instructions for these -void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { +bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); + + // Leave IR for LSE if subtarget supports it. + if (Subtarget->hasLSE()) return false; + if (MemTy == MVT::i8) Opcode = AArch64::CMP_SWAP_8; else if (MemTy == MVT::i16) @@ -2637,6 +2641,8 @@ void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); CurDAG->RemoveDeadNode(N); + + return true; } void AArch64DAGToDAGISel::Select(SDNode *Node) { @@ -2660,8 +2666,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { break; case ISD::ATOMIC_CMP_SWAP: - SelectCMP_SWAP(Node); - return; + if (SelectCMP_SWAP(Node)) + return; + break; case ISD::READ_REGISTER: if (tryReadRegister(Node)) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 083ca2156598f..2965106fd2708 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10563,11 +10563,20 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; + if (Size > 128) return AtomicExpansionKind::None; + // Nand not supported in LSE. + if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; + // Currently leaving And and Sub to LLSC + if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub)) + return AtomicExpansionKind::LLSC; + // Leave 128 bits to LLSC. + return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; } bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { + // If subtarget has LSE, leave cmpxchg intact for codegen. + if (Subtarget->hasLSE()) return false; // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td index 71826bec6b11f..de283b70210fe 100644 --- a/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -405,3 +405,49 @@ def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch), (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, GPR64:$newLo, GPR64:$newHi), []>, Sched<[WriteAtomic]>; + +// v8.1 Atomic instructions: +def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALb GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALh GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALs GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALd GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>; + +def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index eea012382150c..314e89bbca863 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1036,7 +1036,7 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) { /// \brief Return the opcode that does not set flags when possible - otherwise /// return the original opcode. The caller is responsible to do the actual /// substitution and legality checking. -static unsigned convertFlagSettingOpcode(const MachineInstr &MI) { +static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { // Don't convert all compare instructions, because for some the zero register // encoding becomes the sp register. bool MIDefinesZeroReg = false; @@ -1145,7 +1145,7 @@ bool AArch64InstrInfo::optimizeCompareInstr( return true; } unsigned Opc = CmpInstr.getOpcode(); - unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); + unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); if (NewOpc == Opc) return false; const MCInstrDesc &MCID = get(NewOpc); @@ -3318,7 +3318,7 @@ static bool getMaddPatterns(MachineInstr &Root, // When NZCV is live bail out. if (Cmp_NZCV == -1) return false; - unsigned NewOpc = convertFlagSettingOpcode(Root); + unsigned NewOpc = convertToNonFlagSettingOpc(Root); // When opcode can't change bail out. // CHECKME: do we miss any cases for opcode conversion? if (NewOpc == Opc) diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 59f3405fe439a..58e9ce583d44c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -119,6 +119,44 @@ public: } } + /// \brief Return the opcode that set flags when possible. The caller is + /// responsible for ensuring the opc has a flag setting equivalent. + static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit) { + switch (Opc) { + default: + llvm_unreachable("Opcode has no flag setting equivalent!"); + // 32-bit cases: + case AArch64::ADDWri: Is64Bit = false; return AArch64::ADDSWri; + case AArch64::ADDWrr: Is64Bit = false; return AArch64::ADDSWrr; + case AArch64::ADDWrs: Is64Bit = false; return AArch64::ADDSWrs; + case AArch64::ADDWrx: Is64Bit = false; return AArch64::ADDSWrx; + case AArch64::ANDWri: Is64Bit = false; return AArch64::ANDSWri; + case AArch64::ANDWrr: Is64Bit = false; return AArch64::ANDSWrr; + case AArch64::ANDWrs: Is64Bit = false; return AArch64::ANDSWrs; + case AArch64::BICWrr: Is64Bit = false; return AArch64::BICSWrr; + case AArch64::BICWrs: Is64Bit = false; return AArch64::BICSWrs; + case AArch64::SUBWri: Is64Bit = false; return AArch64::SUBSWri; + case AArch64::SUBWrr: Is64Bit = false; return AArch64::SUBSWrr; + case AArch64::SUBWrs: Is64Bit = false; return AArch64::SUBSWrs; + case AArch64::SUBWrx: Is64Bit = false; return AArch64::SUBSWrx; + // 64-bit cases: + case AArch64::ADDXri: Is64Bit = true; return AArch64::ADDSXri; + case AArch64::ADDXrr: Is64Bit = true; return AArch64::ADDSXrr; + case AArch64::ADDXrs: Is64Bit = true; return AArch64::ADDSXrs; + case AArch64::ADDXrx: Is64Bit = true; return AArch64::ADDSXrx; + case AArch64::ANDXri: Is64Bit = true; return AArch64::ANDSXri; + case AArch64::ANDXrr: Is64Bit = true; return AArch64::ANDSXrr; + case AArch64::ANDXrs: Is64Bit = true; return AArch64::ANDSXrs; + case AArch64::BICXrr: Is64Bit = true; return AArch64::BICSXrr; + case AArch64::BICXrs: Is64Bit = true; return AArch64::BICSXrs; + case AArch64::SUBXri: Is64Bit = true; return AArch64::SUBSXri; + case AArch64::SUBXrr: Is64Bit = true; return AArch64::SUBSXrr; + case AArch64::SUBXrs: Is64Bit = true; return AArch64::SUBSXrs; + case AArch64::SUBXrx: Is64Bit = true; return AArch64::SUBSXrx; + } + } + + /// Return true if this is a load/store that can be potentially paired/merged. bool isCandidateToMergeOrPair(MachineInstr &MI) const; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 9243eb91cc1ac..005f2d51e4036 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -795,6 +795,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, int LoadSize = getMemScale(*LoadI); int StoreSize = getMemScale(*StoreI); unsigned LdRt = getLdStRegOp(*LoadI).getReg(); + const MachineOperand &StMO = getLdStRegOp(*StoreI); unsigned StRt = getLdStRegOp(*StoreI).getReg(); bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); @@ -807,7 +808,13 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, // Remove the load, if the destination register of the loads is the same // register for stored value. if (StRt == LdRt && LoadSize == 8) { - StoreI->clearRegisterKills(StRt, TRI); + for (MachineInstr &MI : make_range(StoreI->getIterator(), + LoadI->getIterator())) { + if (MI.killsRegister(StRt, TRI)) { + MI.clearRegisterKills(StRt, TRI); + break; + } + } DEBUG(dbgs() << "Remove load instruction:\n "); DEBUG(LoadI->print(dbgs())); DEBUG(dbgs() << "\n"); @@ -819,7 +826,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) - .addReg(StRt) + .add(StMO) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // FIXME: Currently we disable this transformation in big-endian targets as @@ -860,14 +867,14 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(AndMaskEncoded); } else { BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(Immr) .addImm(Imms); } @@ -876,7 +883,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, // Clear kill flags between store and load. for (MachineInstr &MI : make_range(StoreI->getIterator(), BitExtMI->getIterator())) - MI.clearRegisterKills(StRt, TRI); + if (MI.killsRegister(StRt, TRI)) { + MI.clearRegisterKills(StRt, TRI); + break; + } DEBUG(dbgs() << "Promoting load by replacing :\n "); DEBUG(StoreI->print(dbgs())); diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp index 3b71d529db59b..ccc9d2ad1b482 100644 --- a/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -7,37 +7,27 @@ // //===----------------------------------------------------------------------===// // -// \file This file contains the AArch64 implementation of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 implementation of the DAG scheduling +/// mutation to pair instructions back to back. // //===----------------------------------------------------------------------===// #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/MacroFusion.h" #include "llvm/Target/TargetInstrInfo.h" -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); - using namespace llvm; -static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden, - cl::desc("Enable scheduling for macro fusion."), cl::init(true)); - namespace { -/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused -/// together. Given an anchor instr, when the other instr is unspecified, then -/// check if the anchor instr may be part of a fused pair at all. +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, - const MachineInstr *SecondMI) { - assert((FirstMI || SecondMI) && "At least one instr must be specified"); - + const MachineInstr &SecondMI) { const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII); const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI); @@ -45,9 +35,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, unsigned FirstOpcode = FirstMI ? FirstMI->getOpcode() : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); - unsigned SecondOpcode = - SecondMI ? SecondMI->getOpcode() - : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); if (ST.hasArithmeticBccFusion()) // Fuse CMN, CMP, TST followed by Bcc. @@ -128,158 +116,49 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (ST.hasFuseAES()) // Fuse AES crypto operations. - switch(FirstOpcode) { + switch(SecondOpcode) { // AES encode. - case AArch64::AESErr: - return SecondOpcode == AArch64::AESMCrr || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::AESMCrr : + return FirstOpcode == AArch64::AESErr || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; // AES decode. - case AArch64::AESDrr: - return SecondOpcode == AArch64::AESIMCrr || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::AESIMCrr: + return FirstOpcode == AArch64::AESDrr || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; } if (ST.hasFuseLiterals()) // Fuse literal generation operations. - switch (FirstOpcode) { + switch (SecondOpcode) { // PC relative address. - case AArch64::ADRP: - return SecondOpcode == AArch64::ADDXri || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; + case AArch64::ADDXri: + return FirstOpcode == AArch64::ADRP || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; // 32 bit immediate. - case AArch64::MOVZWi: - return (SecondOpcode == AArch64::MOVKWi && - SecondMI->getOperand(3).getImm() == 16) || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; - // Lower half of 64 bit immediate. - case AArch64::MOVZXi: - return (SecondOpcode == AArch64::MOVKXi && - SecondMI->getOperand(3).getImm() == 16) || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; - // Upper half of 64 bit immediate. + case AArch64::MOVKWi: + return (FirstOpcode == AArch64::MOVZWi && + SecondMI.getOperand(3).getImm() == 16) || + FirstOpcode == AArch64::INSTRUCTION_LIST_END; + // Lower and upper half of 64 bit immediate. case AArch64::MOVKXi: - return FirstMI->getOperand(3).getImm() == 32 && - ((SecondOpcode == AArch64::MOVKXi && - SecondMI->getOperand(3).getImm() == 48) || - SecondOpcode == AArch64::INSTRUCTION_LIST_END); + return FirstOpcode == AArch64::INSTRUCTION_LIST_END || + (FirstOpcode == AArch64::MOVZXi && + SecondMI.getOperand(3).getImm() == 16) || + (FirstOpcode == AArch64::MOVKXi && + FirstMI->getOperand(3).getImm() == 32 && + SecondMI.getOperand(3).getImm() == 48); } return false; } -/// \brief Implement the fusion of instr pairs in the scheduling DAG, -/// anchored at the instr in AnchorSU.. -static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { - const MachineInstr *AnchorMI = AnchorSU.getInstr(); - if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient()) - return false; - - // If the anchor instr is the ExitSU, then consider its predecessors; - // otherwise, its successors. - bool Preds = (&AnchorSU == &DAG->ExitSU); - SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs; - - const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI; - const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr; - - // Check if the anchor instr may be fused. - if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), - FirstMI, SecondMI)) - return false; - - // Explorer for fusion candidates among the dependencies of the anchor instr. - for (SDep &Dep : AnchorDeps) { - // Ignore dependencies that don't enforce ordering. - if (Dep.isWeak()) - continue; - - SUnit &DepSU = *Dep.getSUnit(); - // Ignore the ExitSU if the dependents are successors. - if (!Preds && &DepSU == &DAG->ExitSU) - continue; - - const MachineInstr *DepMI = DepSU.getInstr(); - if (!DepMI || DepMI->isPseudo() || DepMI->isTransient()) - continue; - - FirstMI = Preds ? DepMI : AnchorMI; - SecondMI = Preds ? AnchorMI : DepMI; - if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), - FirstMI, SecondMI)) - continue; - - // Create a single weak edge between the adjacent instrs. The only effect is - // to cause bottom-up scheduling to heavily prioritize the clustered instrs. - SUnit &FirstSU = Preds ? DepSU : AnchorSU; - SUnit &SecondSU = Preds ? AnchorSU : DepSU; - DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); - - // Adjust the latency between the anchor instr and its - // predecessors/successors. - for (SDep &IDep : AnchorDeps) - if (IDep.getSUnit() == &DepSU) - IDep.setLatency(0); - - // Adjust the latency between the dependent instr and its - // successors/predecessors. - for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds) - if (IDep.getSUnit() == &AnchorSU) - IDep.setLatency(0); - - DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; - FirstSU.print(dbgs(), DAG); dbgs() << " - "; - SecondSU.print(dbgs(), DAG); dbgs() << " / "; - dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << - DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); - - if (&SecondSU != &DAG->ExitSU) - // Make instructions dependent on FirstSU also dependent on SecondSU to - // prevent them from being scheduled between FirstSU and and SecondSU. - for (SUnit::const_succ_iterator - SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end(); - SI != SE; ++SI) { - if (!SI->getSUnit() || SI->getSUnit() == &SecondSU) - continue; - DEBUG(dbgs() << " Copy Succ "; - SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';); - DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial)); - } - - ++NumFused; - return true; - } - - return false; -} - -/// \brief Post-process the DAG to create cluster edges between instrs that may -/// be fused by the processor into a single operation. -class AArch64MacroFusion : public ScheduleDAGMutation { -public: - AArch64MacroFusion() {} - - void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - - // For each of the SUnits in the scheduling block, try to fuse the instr in it - // with one in its successors. - for (SUnit &ISU : DAG->SUnits) - scheduleAdjacentImpl(DAG, ISU); - - // Try to fuse the instr in the ExitSU with one in its predecessors. - scheduleAdjacentImpl(DAG, DAG->ExitSU); -} - } // end namespace namespace llvm { std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () { - return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr; + return createMacroFusionDAGMutation(shouldScheduleAdjacent); } } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64MacroFusion.h b/lib/Target/AArch64/AArch64MacroFusion.h index e5efedd9fbfd9..32d90d4c40d6f 100644 --- a/lib/Target/AArch64/AArch64MacroFusion.h +++ b/lib/Target/AArch64/AArch64MacroFusion.h @@ -2,23 +2,18 @@ // // The LLVM Compiler Infrastructure // -// \fileThis file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This file contains the AArch64 definition of the DAG scheduling mutation -// to pair instructions back to back. +/// \file This file contains the AArch64 definition of the DAG scheduling +/// mutation to pair instructions back to back. // //===----------------------------------------------------------------------===// -#include "AArch64InstrInfo.h" #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// - namespace llvm { /// Note that you have to add: diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 9b3899e0681cf..69124dbd0f838 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -469,10 +469,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), /*NumOperands*/ 2); } - case TargetOpcode::G_SEQUENCE: - // FIXME: support this, but the generic code is really not going to do - // anything sane. - return getInvalidInstructionMapping(); default: break; } diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td index 303398ea0b7f3..5d1608ef04afa 100644 --- a/lib/Target/AArch64/AArch64SchedA57.td +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// The Cortex-A57 is a traditional superscaler microprocessor with a +// The Cortex-A57 is a traditional superscalar microprocessor with a // conservative 3-wide in-order stage for decode and dispatch. Combined with the // much wider out-of-order issue stage, this produced a need to carefully // schedule micro-ops so that all three decoded each cycle are successfully diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td index 3d737402022d8..0aeb1f3e30584 100644 --- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -32,8 +32,8 @@ //===----------------------------------------------------------------------===// // Define 0 micro-op types -def FalkorWr_none_1cyc : SchedWriteRes<[]> { - let Latency = 1; +def FalkorWr_LdStInc_none_3cyc : SchedWriteRes<[]> { + let Latency = 3; let NumMicroOps = 0; } def FalkorWr_none_3cyc : SchedWriteRes<[]> { @@ -505,7 +505,8 @@ def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, let NumMicroOps = 12; } -// Forwarding logic is modeled for multiply add/accumulate. +// Forwarding logic is modeled for multiply add/accumulate and +// load/store base register increment. // ----------------------------------------------------------------------------- def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; @@ -513,9 +514,13 @@ def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; +def FalkorReadIncLd : SchedReadAdvance<2, [FalkorWr_LdStInc_none_3cyc]>; +def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_LdStInc_none_3cyc]>; + // SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast // ----------------------------------------------------------------------------- -def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>; +def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() && + MI->getOperand(1).getImm() == 0}]>; def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || MI->getOperand(1).getReg() == AArch64::XZR}]>; @@ -770,84 +775,113 @@ def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], // SIMD Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instrs LD2i64)>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instrs LD2i64_POST)>; - -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD3i64_POST)>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD4i64_POST)>; - -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instrs LD3Threev2d_POST)>; -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instrs LD4Fourv2d_POST)>; -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "^LD3Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1none_4cyc], - (instregex "^LD3Threev(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2none_4cyc], - (instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD3Threev(16b|8h|4s)$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>; - -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc], - (instregex "^LD3Threev(16b|8h|4s)_POST$")>; - -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc], - (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instrs LD2i64_POST)>; + +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD1i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD3i64_POST)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD4i64_POST)>; + +def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD2i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instrs LD3Threev2d_POST)>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instrs LD4Fourv2d_POST)>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)_POST$")>; + +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; // Arithmetic and Logical Instructions // ----------------------------------------------------------------------------- @@ -929,87 +963,105 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; // SIMD Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STR(Q|D|S|H|B)ui$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(Q|D|S|H|B)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^STR(Q|D|S|H|B)(post|pre)$")>; -def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^STPQi$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2VSD_2ST_0cyc], +def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(D|S|H|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STPQi$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], (instregex "^STPQ(post|pre)$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STP(D|S)(i)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(D|S)(i)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], (instregex "^STP(D|S)(post|pre)$")>; -def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STUR(Q|D|S|B|H)i$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instrs STNPDi, STNPSi)>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instrs STNPQi)>; - -def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], +def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt], + (instregex "^STRQro(W|X)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(Q|D|S|B|H)i$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPDi, STNPSi)>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPQi)>; + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>; -def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc], +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc], - (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; - -def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST3(i8|i16|i32|i64)$")>; -def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST4(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>; + +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4(i8|i16|i32|i64)$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST3(i8|i16|i32|i64)_POST$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST4(i8|i16|i32|i64)_POST$")>; -def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc], - (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>; +def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc], - (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>; -def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instrs ST3Threev2d)>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST3Threev2d)>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], (instrs ST3Threev2d_POST)>; -def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc], - (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>; +def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc], - (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>; -def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instrs ST4Fourv2d)>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST4Fourv2d)>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], (instrs ST4Fourv2d_POST)>; -def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc], +def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST3Three(v16b|v8h|v4s)$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; -def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc], +def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST4Four(v16b|v8h|v4s)$")>; // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). -def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc], +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; // Branch Instructions @@ -1033,22 +1085,25 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>; // FP Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(Q|D|S|H|B)i$")>; -def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(Q|D|S|H|B)i$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instrs LDNPQi)>; -def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instrs LDPQi)>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instregex "LDNP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instregex "LDP(D|S)i$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instregex "LDP(D|S)(pre|post)$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instregex "^LDPQ(pre|post)$")>; // FP Data Processing Instructions @@ -1106,31 +1161,41 @@ def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instregex "^LDNP(W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instregex "^LDP(W|X)i$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], (instregex "^LDP(W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(BB|HH|W|X)ui$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(BB|HH|W|X)i$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(W|X)l$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(BB|HH|W|X)i$")>; def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>; -def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc], +def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], (instrs LDPSWi)>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc], +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], (instregex "^LDPSW(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc], +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; -def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; -def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>; -def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; -def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; +def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instrs LDRSWl)>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; // Miscellaneous Data-Processing Instructions // ----------------------------------------------------------------------------- @@ -1178,32 +1243,46 @@ def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HL def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>; def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>; def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>; def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs STNPWi, STNPXi)>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPWi, STNPXi)>; def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>; -def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>; - -def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>; -def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXR(B|H|W|X)$")>; + +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXR(B|H|W|X)$")>; // Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STP(W|X)i$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc], +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(W|X)i$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], (instregex "^STP(W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STR(BB|HH|W|X)ui$")>; -def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc], +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], (instregex "^STR(BB|HH|W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_STRro], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STTR(B|H|W|X)i$")>; -def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STUR(BB|HH|W|X)i$")>; +def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(BB|HH|W|X)i$")>; diff --git a/lib/Target/AArch64/AArch64SchedKryoDetails.td b/lib/Target/AArch64/AArch64SchedKryoDetails.td index 02cccccd3078c..cf4cdabb8cbfc 100644 --- a/lib/Target/AArch64/AArch64SchedKryoDetails.td +++ b/lib/Target/AArch64/AArch64SchedKryoDetails.td @@ -1374,7 +1374,9 @@ def KryoWrite_3cyc_LS_LS_400ln : let Latency = 3; let NumMicroOps = 2; } def : InstRW<[KryoWrite_3cyc_LS_LS_400ln], - (instregex "(LDAX?R(B|H|W|X)|LDAXP(W|X))")>; + (instregex "LDAX?R(B|H|W|X)")>; +def : InstRW<[KryoWrite_3cyc_LS_LS_400ln, WriteLDHi], + (instregex "LDAXP(W|X)")>; def KryoWrite_3cyc_LS_LS_401ln : SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { let Latency = 3; let NumMicroOps = 2; @@ -1565,7 +1567,7 @@ def KryoWrite_3cyc_LS_258ln : SchedWriteRes<[KryoUnitLS]> { let Latency = 3; let NumMicroOps = 1; } -def : InstRW<[KryoWrite_3cyc_LS_258ln], +def : InstRW<[KryoWrite_3cyc_LS_258ln, WriteLDHi], (instregex "LDXP(W|X)")>; def KryoWrite_3cyc_LS_258_1ln : SchedWriteRes<[KryoUnitLS]> { diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index d4a8cecdb29f1..6660f0babb8a6 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -47,6 +47,11 @@ static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp", cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); +static cl::opt<bool> + EnableCondBrTuning("aarch64-enable-cond-br-tune", + cl::desc("Enable the conditional branch tuning pass"), + cl::init(true), cl::Hidden); + static cl::opt<bool> EnableMCR("aarch64-enable-mcr", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); @@ -429,6 +434,8 @@ bool AArch64PassConfig::addILPOpts() { addPass(createAArch64ConditionalCompares()); if (EnableMCR) addPass(&MachineCombinerID); + if (EnableCondBrTuning) + addPass(createAArch64CondBrTuning()); if (EnableEarlyIfConversion) addPass(&EarlyIfConverterID); if (EnableStPairSuppress) diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index f0f50f29be0f3..02b12b5e90ca2 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_target(AArch64CodeGen AArch64AsmPrinter.cpp AArch64CleanupLocalDynamicTLSPass.cpp AArch64CollectLOH.cpp + AArch64CondBrTuning.cpp AArch64ConditionalCompares.cpp AArch64DeadRegisterDefinitionsPass.cpp AArch64ExpandPseudoInsts.cpp diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 43a6fa9ce0896..3d075018904c0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -43,26 +43,25 @@ public: const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { - // This table *must* be in the order that the fixup_* kinds are defined in - // AArch64FixupKinds.h. - // - // Name Offset (bits) Size (bits) Flags - { "fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal }, - { "fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal }, - { "fixup_aarch64_add_imm12", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale1", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale2", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale4", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale8", 10, 12, 0 }, - { "fixup_aarch64_ldst_imm12_scale16", 10, 12, 0 }, - { "fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal }, - { "fixup_aarch64_movw", 5, 16, 0 }, - { "fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal }, - { "fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal }, - { "fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal }, - { "fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal }, - { "fixup_aarch64_tlsdesc_call", 0, 0, 0 } - }; + // This table *must* be in the order that the fixup_* kinds are defined + // in AArch64FixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + {"fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal}, + {"fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal}, + {"fixup_aarch64_add_imm12", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale1", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale2", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale4", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale8", 10, 12, 0}, + {"fixup_aarch64_ldst_imm12_scale16", 10, 12, 0}, + {"fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal}, + {"fixup_aarch64_movw", 5, 16, 0}, + {"fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal}, + {"fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal}, + {"fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal}, + {"fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal}, + {"fixup_aarch64_tlsdesc_call", 0, 0, 0}}; if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); @@ -72,8 +71,9 @@ public: return Infos[Kind - FirstTargetFixupKind]; } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; bool mayNeedRelaxation(const MCInst &Inst) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -261,13 +261,15 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con } } -void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); if (!Value) return; // Doesn't change encoding. MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + MCContext &Ctx = Asm.getContext(); // Apply any target-specific value adjustments. Value = adjustFixupValue(Fixup, Value, Ctx); @@ -275,7 +277,7 @@ void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FulleSizeInBytes = getFixupKindContainereSizeInBytes(Fixup.getKind()); @@ -289,7 +291,7 @@ void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } else { // Handle as big-endian - assert((Offset + FulleSizeInBytes) <= DataSize && "Invalid fixup size!"); + assert((Offset + FulleSizeInBytes) <= Data.size() && "Invalid fixup size!"); assert(NumBytes <= FulleSizeInBytes && "Invalid fixup size!"); for (unsigned i = 0; i != NumBytes; ++i) { unsigned Idx = FulleSizeInBytes - 1 - i; @@ -539,16 +541,14 @@ public: return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32); } - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; + void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, bool &IsResolved) override; }; -void ELFAArch64AsmBackend::processFixupValue( - const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, - const MCFragment *DF, const MCValue &Target, uint64_t &Value, - bool &IsResolved) { +void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + bool &IsResolved) { // The ADRP instruction adds some multiple of 0x1000 to the current PC & // ~0xfff. This means that the required offset to reach a symbol can vary by // up to one step depending on where the ADRP is in memory. For example: diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 6ab2b9ef04598..7494e5decd6f6 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -238,6 +238,36 @@ def FeatureSDWA : SubtargetFeature<"sdwa", "Support SDWA (Sub-DWORD Addressing) extension" >; +def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", + "HasSDWAOmod", + "true", + "Support OMod with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", + "HasSDWAScalar", + "true", + "Support scalar register with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", + "HasSDWASdst", + "true", + "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", + "HasSDWAMac", + "true", + "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" +>; + +def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc", + "HasSDWAClampVOPC", + "true", + "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" +>; + def FeatureDPP : SubtargetFeature<"dpp", "HasDPP", "true", @@ -421,8 +451,8 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, - FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, - FeatureDPP + FeatureScalarStores, FeatureInv2PiInlineImm, + FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP ] >; @@ -432,7 +462,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, - FeatureFastFMAF32, FeatureSDWA, FeatureDPP, + FeatureFastFMAF32, FeatureDPP, + FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts ] >; @@ -449,14 +480,14 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping, def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0, [FeatureSouthernIslands, - FeatureFastFMAF32, + FeatureFastFMAF32, HalfRate64Ops, FeatureLDSBankCount32]>; def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1, [FeatureSouthernIslands, FeatureLDSBankCount32]>; - + def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, [FeatureSeaIslands, FeatureLDSBankCount32]>; @@ -644,7 +675,11 @@ def isCIVI : Predicate < "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate<"FeatureCIInsts">; -def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; +def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, + AssemblerPredicate<"FeatureFlatAddressSpace">; + +def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, + AssemblerPredicate<"FeatureFlatGlobalInsts">; def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, AssemblerPredicate<"Feature16BitInsts">; diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 5586b513b5fca..96f819fd0e684 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3527,18 +3527,25 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, //===----------------------------------------------------------------------===// SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const { + const TargetRegisterClass *RC, + unsigned Reg, EVT VT, + const SDLoc &SL, + bool RawReg) const { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned VirtualRegister; + unsigned VReg; + if (!MRI.isLiveIn(Reg)) { - VirtualRegister = MRI.createVirtualRegister(RC); - MRI.addLiveIn(Reg, VirtualRegister); + VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(Reg, VReg); } else { - VirtualRegister = MRI.getLiveInVirtReg(Reg); + VReg = MRI.getLiveInVirtReg(Reg); } - return DAG.getRegister(VirtualRegister, VT); + + if (RawReg) + return DAG.getRegister(VReg, VT); + + return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); } uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( @@ -3657,6 +3664,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(LOAD_CONSTANT) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) + NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3) + NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 0d066cdbdff4d..a45234e2b39f2 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -216,10 +216,25 @@ public: /// \brief Helper function that adds Reg to the LiveIn list of the DAG's /// MachineFunction. /// - /// \returns a RegisterSDNode representing Reg. - virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const; + /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise + /// a copy from the register. + SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT, + const SDLoc &SL, + bool RawReg = false) const; + SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode())); + } + + // Returns the raw live in register rather than a copy from it. + SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true); + } enum ImplicitParameter { FIRST_IMPLICIT, @@ -388,6 +403,8 @@ enum NodeType : unsigned { STORE_MSKOR, LOAD_CONSTANT, TBUFFER_STORE_FORMAT, + TBUFFER_STORE_FORMAT_X3, + TBUFFER_LOAD_FORMAT, ATOMIC_CMP_SWAP, ATOMIC_INC, ATOMIC_DEC, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index a01f5d37c7c16..69dc529861729 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -66,7 +66,9 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, - VI = 1 + VI = 1, + SDWA = 2, + SDWA9 = 3 }; // Wrapper for Tablegen'd function. enum Subtarget is not defined in any @@ -101,7 +103,12 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { } int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { - int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST)); + SIEncodingFamily Gen = subtargetEncodingFamily(ST); + if (get(Opcode).TSFlags & SIInstrFlags::SDWA) + Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 + : SIEncodingFamily::SDWA; + + int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); // -1 means that Opcode is already a native instruction. if (MCOp == -1) diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index e286558ce60d7..bcf89bb78ad66 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -70,6 +70,10 @@ def AMDGPUElseBreakOp : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>] >; +def AMDGPUAddeSubeOp : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>] +>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -179,6 +183,12 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; // out = (src1 > src0) ? 1 : 0 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; +// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own +// nodes in TargetSelectionDAG.td. +def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>; + +def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>; + def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 790a69b843979..cc56216c355bf 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -29,12 +29,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() { using namespace TargetOpcode; const LLT S1= LLT::scalar(1); + const LLT V2S16 = LLT::vector(2, 16); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); const LLT P1 = LLT::pointer(1, 64); const LLT P2 = LLT::pointer(2, 64); setAction({G_ADD, S32}, Legal); + setAction({G_AND, S32}, Legal); + + setAction({G_BITCAST, V2S16}, Legal); + setAction({G_BITCAST, 1, S32}, Legal); + + setAction({G_BITCAST, S32}, Legal); + setAction({G_BITCAST, 1, V2S16}, Legal); // FIXME: i1 operands to intrinsics should always be legal, but other i1 // values may not be legal. We need to figure out how to distinguish @@ -61,6 +69,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() { setAction({G_SELECT, S32}, Legal); setAction({G_SELECT, 1, S1}, Legal); + setAction({G_SHL, S32}, Legal); + setAction({G_STORE, S32}, Legal); setAction({G_STORE, 1, P1}, Legal); diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 8d157e2f98f24..ab5abf2039a5b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -124,6 +124,11 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasScalarStores(false), HasInv2PiInlineImm(false), HasSDWA(false), + HasSDWAOmod(false), + HasSDWAScalar(false), + HasSDWASdst(false), + HasSDWAMac(false), + HasSDWAClampVOPC(false), HasDPP(false), FlatAddressSpace(false), FlatInstOffsets(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 5f4f20316a6ba..2b16289c723ef 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -149,6 +149,11 @@ protected: bool HasScalarStores; bool HasInv2PiInlineImm; bool HasSDWA; + bool HasSDWAOmod; + bool HasSDWAScalar; + bool HasSDWASdst; + bool HasSDWAMac; + bool HasSDWAClampVOPC; bool HasDPP; bool FlatAddressSpace; bool FlatInstOffsets; @@ -431,6 +436,26 @@ public: return HasSDWA; } + bool hasSDWAOmod() const { + return HasSDWAOmod; + } + + bool hasSDWAScalar() const { + return HasSDWAScalar; + } + + bool hasSDWASdst() const { + return HasSDWASdst; + } + + bool hasSDWAMac() const { + return HasSDWAMac; + } + + bool hasSDWAClampVOPC() const { + return HasSDWAClampVOPC; + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b644eba536fa4..04fe9f689806c 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -342,6 +342,14 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { PM.add(createAMDGPUExternalAAWrapperPass()); } }); + + Builder.addExtension( + PassManagerBuilder::EP_CGSCCOptimizerLate, + [](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + // Add infer address spaces pass to the opt pipeline after inlining + // but before SROA to increase SROA opportunities. + PM.add(createInferAddressSpacesPass()); + }); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0d6689bd04c4e..88245b01683a5 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -184,9 +184,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, } } -unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { - if (Vec) - return 0; +unsigned AMDGPUTTIImpl::getHardwareNumberOfRegisters(bool Vec) const { + // The concept of vector registers doesn't really exist. Some packed vector + // operations operate on the normal 32-bit registers. // Number of VGPRs on SI. if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) @@ -195,8 +195,18 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } +unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) const { + // This is really the number of registers to fill when vectorizing / + // interleaving loops, so we lie to avoid trying to use all registers. + return getHardwareNumberOfRegisters(Vec) >> 3; +} + unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const { - return Vector ? 0 : 32; + return 32; +} + +unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const { + return 32; } unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { @@ -247,11 +257,11 @@ bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { // Disable unrolling if the loop is not vectorized. + // TODO: Enable this again. if (VF == 1) return 1; - // Semi-arbitrary large amount. - return 64; + return 8; } int AMDGPUTTIImpl::getArithmeticInstrCost( diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index a60b1bb1b59c7..485e20411ab49 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -75,8 +75,10 @@ public: return TTI::PSK_FastHardware; } - unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector) const; + unsigned getHardwareNumberOfRegisters(bool Vector) const; + unsigned getNumberOfRegisters(bool Vector) const; + unsigned getRegisterBitWidth(bool Vector) const ; + unsigned getMinVectorRegisterBitWidth() const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 392e9d89bd9ba..7b8756050b752 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -152,6 +152,8 @@ public: ImmTyExpTgt, ImmTyExpCompr, ImmTyExpVM, + ImmTyDFMT, + ImmTyNFMT, ImmTyHwreg, ImmTyOff, ImmTySendMsg, @@ -260,6 +262,8 @@ public: return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); } + bool isSDWARegKind() const; + bool isImmTy(ImmTy ImmT) const { return isImm() && Imm.Type == ImmT; } @@ -292,6 +296,8 @@ public: bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } bool isTFE() const { return isImmTy(ImmTyTFE); } + bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); } + bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); } bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } @@ -636,6 +642,8 @@ public: case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; case ImmTyTFE: OS << "TFE"; break; + case ImmTyDFMT: OS << "DFMT"; break; + case ImmTyNFMT: OS << "NFMT"; break; case ImmTyClampSI: OS << "ClampSI"; break; case ImmTyOModSI: OS << "OModSI"; break; case ImmTyDppCtrl: OS << "DppCtrl"; break; @@ -993,7 +1001,9 @@ private: void errorExpTgt(); OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); - bool validateOperandLimitations(const MCInst &Inst); + bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); + bool validateConstantBusLimitations(const MCInst &Inst); + bool validateEarlyClobberLimitations(const MCInst &Inst); bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -1029,6 +1039,8 @@ public: void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } + void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); + AMDGPUOperand::Ptr defaultGLC() const; AMDGPUOperand::Ptr defaultSLC() const; AMDGPUOperand::Ptr defaultTFE() const; @@ -1042,6 +1054,7 @@ public: AMDGPUOperand::Ptr defaultSMRDOffset20() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; AMDGPUOperand::Ptr defaultOffsetU12() const; + AMDGPUOperand::Ptr defaultOffsetS13() const; OperandMatchResultTy parseOModOperand(OperandVector &Operands); @@ -1243,6 +1256,15 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const { return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); } +bool AMDGPUOperand::isSDWARegKind() const { + if (AsmParser->isVI()) + return isVReg(); + else if (AsmParser->isGFX9()) + return isRegKind(); + else + return false; +} + uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const { assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); @@ -2083,7 +2105,7 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); } -bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { +bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { const unsigned Opcode = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opcode); unsigned ConstantBusUseCount = 0; @@ -2137,6 +2159,60 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { return ConstantBusUseCount <= 1; } +bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { + + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + + const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); + if (DstIdx == -1 || + Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { + return true; + } + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + assert(DstIdx != -1); + const MCOperand &Dst = Inst.getOperand(DstIdx); + assert(Dst.isReg()); + const unsigned DstReg = mc2PseudoReg(Dst.getReg()); + + const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + for (int SrcIdx : SrcIndices) { + if (SrcIdx == -1) break; + const MCOperand &Src = Inst.getOperand(SrcIdx); + if (Src.isReg()) { + const unsigned SrcReg = mc2PseudoReg(Src.getReg()); + if (isRegIntersect(DstReg, SrcReg, TRI)) { + return false; + } + } + } + + return true; +} + +bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, + const SMLoc &IDLoc) { + if (!validateConstantBusLimitations(Inst)) { + Error(IDLoc, + "invalid operand (violates constant bus restrictions)"); + return false; + } + if (!validateEarlyClobberLimitations(Inst)) { + Error(IDLoc, + "destination must be different than all sources"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2169,9 +2245,8 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (Result) { default: break; case Match_Success: - if (!validateOperandLimitations(Inst)) { - return Error(IDLoc, - "invalid operand (violates constant bus restrictions)"); + if (!validateInstruction(Inst, IDLoc)) { + return true; } Inst.setLoc(IDLoc); Out.EmitInstruction(Inst, getSTI()); @@ -2554,11 +2629,21 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { return MatchOperand_ParseFail; Parser.Lex(); + + bool IsMinus = false; + if (getLexer().getKind() == AsmToken::Minus) { + Parser.Lex(); + IsMinus = true; + } + if (getLexer().isNot(AsmToken::Integer)) return MatchOperand_ParseFail; if (getParser().parseAbsoluteExpression(Int)) return MatchOperand_ParseFail; + + if (IsMinus) + Int = -Int; break; } } @@ -3743,6 +3828,44 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } +void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOffset); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); +} + //===----------------------------------------------------------------------===// // mimg //===----------------------------------------------------------------------===// @@ -3870,6 +3993,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + //===----------------------------------------------------------------------===// // vop3 //===----------------------------------------------------------------------===// @@ -3919,6 +4046,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, + {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr}, + {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, @@ -4475,12 +4604,11 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { - // V_NOP_sdwa_vi has no optional sdwa arguments + // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (isGFX9() && - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); @@ -4490,8 +4618,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, case SIInstrFlags::VOP2: addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (isGFX9() && - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); @@ -4501,9 +4628,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: - if (isVI()) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 2aca65ac84303..2e96c14eaa320 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -57,6 +57,11 @@ class MUBUFAddr64Table <bit is_addr64, string suffix = ""> { string OpName = NAME # suffix; } +class MTBUFAddr64Table <bit is_addr64, string suffix = ""> { + bit IsAddr64 = is_addr64; + string OpName = NAME # suffix; +} + //===----------------------------------------------------------------------===// // MTBUF classes //===----------------------------------------------------------------------===// @@ -78,14 +83,31 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, let EXP_CNT = 1; let MTBUF = 1; let Uses = [EXEC]; - let hasSideEffects = 0; let SchedRW = [WriteVMEM]; + + let AsmMatchConverter = "cvtMtbuf"; + + bits<1> offen = 0; + bits<1> idxen = 0; + bits<1> addr64 = 0; + bits<1> has_vdata = 1; + bits<1> has_vaddr = 1; + bits<1> has_glc = 1; + bits<1> glc_value = 0; // the value for glc if no such operand + bits<4> dfmt_value = 1; // the value for dfmt if no such operand + bits<3> nfmt_value = 0; // the value for nfmt if no such operand + bits<1> has_srsrc = 1; + bits<1> has_soffset = 1; + bits<1> has_offset = 1; + bits<1> has_slc = 1; + bits<1> has_tfe = 1; + bits<1> has_dfmt = 1; + bits<1> has_nfmt = 1; } class MTBUF_Real <MTBUF_Pseudo ps> : - InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, - Enc64 { + InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> { let isPseudo = 0; let isCodeGenOnly = 0; @@ -97,57 +119,168 @@ class MTBUF_Real <MTBUF_Pseudo ps> : let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags; - bits<8> vdata; bits<12> offset; - bits<1> offen; - bits<1> idxen; - bits<1> glc; - bits<1> addr64; - bits<4> dfmt; - bits<3> nfmt; - bits<8> vaddr; - bits<7> srsrc; - bits<1> slc; - bits<1> tfe; - bits<8> soffset; - - let Inst{11-0} = offset; - let Inst{12} = offen; - let Inst{13} = idxen; - let Inst{14} = glc; - let Inst{22-19} = dfmt; - let Inst{25-23} = nfmt; - let Inst{31-26} = 0x3a; //encoding - let Inst{39-32} = vaddr; - let Inst{47-40} = vdata; - let Inst{52-48} = srsrc{6-2}; - let Inst{54} = slc; - let Inst{55} = tfe; - let Inst{63-56} = soffset; + bits<1> glc; + bits<4> dfmt; + bits<3> nfmt; + bits<8> vaddr; + bits<8> vdata; + bits<7> srsrc; + bits<1> slc; + bits<1> tfe; + bits<8> soffset; +} + +class getMTBUFInsDA<list<RegisterClass> vdataList, + list<RegisterClass> vaddrList=[]> { + RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); + RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); + dag InsNoData = !if(!empty(vaddrList), + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe), + (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe) + ); + dag InsData = !if(!empty(vaddrList), + (ins vdataClass:$vdata, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, + slc:$slc, tfe:$tfe), + (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, + slc:$slc, tfe:$tfe) + ); + dag ret = !if(!empty(vdataList), InsNoData, InsData); } -class MTBUF_Load_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo < - opName, (outs regClass:$dst), - (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc, - i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), - " $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"# - " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> { +class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> { + dag ret = + !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList>.ret, + !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret, + !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret, + !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64]>.ret, + !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64]>.ret, + (ins)))))); +} + +class getMTBUFAsmOps<int addrKind> { + string Pfx = + !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset", + !if(!eq(addrKind, BUFAddrKind.OffEn), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen", + !if(!eq(addrKind, BUFAddrKind.IdxEn), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen", + !if(!eq(addrKind, BUFAddrKind.BothEn), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen", + !if(!eq(addrKind, BUFAddrKind.Addr64), + "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64", + ""))))); + string ret = Pfx # "$offset"; +} + +class MTBUF_SetupAddr<int addrKind> { + bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1, + !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + + bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1, + !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + + bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0); + + bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1); +} + +class MTBUF_Load_Pseudo <string opName, + int addrKind, + RegisterClass vdataClass, + list<dag> pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind> + : MTBUF_Pseudo<opName, + (outs vdataClass:$vdata), + getMTBUFIns<addrKindCopy>.ret, + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe", + pattern>, + MTBUF_SetupAddr<addrKindCopy> { + let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; let mayLoad = 1; let mayStore = 0; } -class MTBUF_Store_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo < - opName, (outs), - (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, - i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, - SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), - " $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"# - " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> { +multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass, + ValueType load_vt = i32, + SDPatternOperator ld = null_frag> { + + def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, + [(set load_vt:$vdata, + (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt, + i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>, + MTBUFAddr64Table<0>; + + def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, + [(set load_vt:$vdata, + (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, + i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>, + MTBUFAddr64Table<1>; + + def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; + def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; + def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + + let DisableWQM = 1 in { + def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>; + def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; + def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; + def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + } +} + +class MTBUF_Store_Pseudo <string opName, + int addrKind, + RegisterClass vdataClass, + list<dag> pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind, + RegisterClass vdataClassCopy = vdataClass> + : MTBUF_Pseudo<opName, + (outs), + getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret, + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe", + pattern>, + MTBUF_SetupAddr<addrKindCopy> { + let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; let mayLoad = 0; let mayStore = 1; } +multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass, + ValueType store_vt = i32, + SDPatternOperator st = null_frag> { + + def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, + [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, + i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc, + i1:$slc, i1:$tfe))]>, + MTBUFAddr64Table<0>; + + def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, + [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, + i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc, + i1:$slc, i1:$tfe))]>, + MTBUFAddr64Table<1>; + + def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; + def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; + def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + + let DisableWQM = 1 in { + def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>; + def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; + def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; + def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + } +} + + //===----------------------------------------------------------------------===// // MUBUF classes //===----------------------------------------------------------------------===// @@ -676,14 +809,14 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", // MTBUF Instructions //===----------------------------------------------------------------------===// -//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0, "tbuffer_load_format_x", []>; -//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <1, "tbuffer_load_format_xy", []>; -//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <2, "tbuffer_load_format_xyz", []>; -def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Pseudo <"tbuffer_load_format_xyzw", VReg_128>; -def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>; -def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>; -def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>; -def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_128>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; } // End let SubtargetPredicate = isGCN @@ -1093,22 +1226,98 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OF // MTBUF Patterns //===----------------------------------------------------------------------===// -// TBUFFER_STORE_FORMAT_*, addr64=0 -class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF_Pseudo opcode> : Pat< - (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, - i32:$soffset, imm:$inst_offset, imm:$dfmt, - imm:$nfmt, imm:$offen, imm:$idxen, - imm:$glc, imm:$slc, imm:$tfe), - (opcode - $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), - (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, - (as_i1imm $slc), (as_i1imm $tfe), $soffset) ->; +//===----------------------------------------------------------------------===// +// tbuffer_load/store_format patterns +//===----------------------------------------------------------------------===// + +multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, + string opcode> { + def : Pat< + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; -def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>; -def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>; -def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>; -def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>; + def : Pat< + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), + (!cast<MTBUF_Pseudo>(opcode # _BOTHEN) + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; +} + +defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">; +defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">; +defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">; +defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32, "TBUFFER_LOAD_FORMAT_X">; +defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">; +defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">; + +multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, + string opcode> { + def : Pat< + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, + (as_i16imm $offset), (as_i8imm $dfmt), + (as_i8imm $nfmt), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, + (as_i16imm $offset), (as_i8imm $dfmt), + (as_i8imm $nfmt), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, + imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, + (as_i16imm $offset), (as_i8imm $dfmt), + (as_i8imm $nfmt), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, + imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), + (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact) + $vdata, + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), + (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) + >; +} + +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">; +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">; +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4i32, "TBUFFER_STORE_FORMAT_XYZ">; +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">; +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32, "TBUFFER_STORE_FORMAT_X">; +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">; +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4f32, "TBUFFER_STORE_FORMAT_XYZ">; +defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">; } // End let Predicates = [isGCN] @@ -1224,21 +1433,44 @@ def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>; class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> : MTBUF_Real<ps>, + Enc64, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> { let AssemblerPredicate=isSICI; let DecoderNamespace="SICI"; - bits<1> addr64; - let Inst{15} = addr64; + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{15} = ps.addr64; let Inst{18-16} = op; + let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value); + let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value); + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -def TBUFFER_LOAD_FORMAT_XYZW_si : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>; -def TBUFFER_STORE_FORMAT_X_si : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>; -def TBUFFER_STORE_FORMAT_XY_si : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>; -def TBUFFER_STORE_FORMAT_XYZ_si : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>; -def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>; +multiclass MTBUF_Real_AllAddr_si<bits<3> op> { + def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; + def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>; + def _OFFEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; + def _IDXEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; + def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; +} +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>; +//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>; //===----------------------------------------------------------------------===// // CI @@ -1350,16 +1582,39 @@ def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> : MTBUF_Real<ps>, + Enc64, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> { let AssemblerPredicate=isVI; let DecoderNamespace="VI"; + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); let Inst{18-15} = op; + let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value); + let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value); + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -def TBUFFER_LOAD_FORMAT_XYZW_vi : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>; -def TBUFFER_STORE_FORMAT_X_vi : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>; -def TBUFFER_STORE_FORMAT_XY_vi : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>; -def TBUFFER_STORE_FORMAT_XYZ_vi : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>; -def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>; +multiclass MTBUF_Real_AllAddr_vi<bits<4> op> { + def _OFFSET_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; + def _OFFEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; + def _IDXEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; + def _BOTHEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; +} +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>; +//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>; diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 88c92b9582fd0..04308fb3aaf64 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -49,6 +49,17 @@ addOperand(MCInst &Inst, const MCOperand& Opnd) { MCDisassembler::SoftFail; } +static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, + uint16_t NameIdx) { + int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); + if (OpIdx != -1) { + auto I = MI.begin(); + std::advance(I, OpIdx); + MI.insert(I, Op); + } + return OpIdx; +} + static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder) { auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); @@ -106,12 +117,12 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } -#define DECODE_SDWA9(DecName) \ -DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName) +#define DECODE_SDWA(DecName) \ +DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) -DECODE_SDWA9(Src32) -DECODE_SDWA9(Src16) -DECODE_SDWA9(VopcDst) +DECODE_SDWA(Src32) +DECODE_SDWA(Src16) +DECODE_SDWA(VopcDst) #include "AMDGPUGenDisassemblerTables.inc" @@ -149,6 +160,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, raw_ostream &WS, raw_ostream &CS) const { CommentStream = &CS; + bool IsSDWA = false; // ToDo: AMDGPUDisassembler supports only VI ISA. if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) @@ -170,10 +182,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Res) break; Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); - if (Res) break; + if (Res) { IsSDWA = true; break; } Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); - if (Res) break; + if (Res) { IsSDWA = true; break; } } // Reinitialize Bytes as DPP64 could have eaten too much @@ -200,17 +212,36 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi)) { // Insert dummy unused src2_modifiers. - int Src2ModIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::src2_modifiers); - auto I = MI.begin(); - std::advance(I, Src2ModIdx); - MI.insert(I, MCOperand::createImm(0)); + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src2_modifiers); } + if (Res && IsSDWA) + Res = convertSDWAInst(MI); + Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0; return Res; } +DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { + if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { + if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) + // VOPC - insert clamp + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); + } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); + if (SDst != -1) { + // VOPC - insert VCC register as sdst + insertNamedMCOperand(MI, MCOperand::createReg(AMDGPU::VCC), + AMDGPU::OpName::sdst); + } else { + // VOP1/2 - insert omod if present in instruction + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); + } + } + return MCDisassembler::Success; +} + const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { return getContext().getRegisterInfo()-> getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); @@ -524,8 +555,6 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c return createSRegOperand(getTtmpClassId(Width), Val - TTMP_MIN); } - assert(Width == OPW16 || Width == OPW32 || Width == OPW64); - if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) return decodeIntImmed(Val); @@ -592,36 +621,43 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { return errOperand(Val, "unknown operand encoding " + Twine(Val)); } -MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width, - unsigned Val) const { +MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, + unsigned Val) const { using namespace AMDGPU::SDWA; - if (SDWA9EncValues::SRC_VGPR_MIN <= Val && - Val <= SDWA9EncValues::SRC_VGPR_MAX) { - return createRegOperand(getVgprClassId(Width), - Val - SDWA9EncValues::SRC_VGPR_MIN); - } - if (SDWA9EncValues::SRC_SGPR_MIN <= Val && - Val <= SDWA9EncValues::SRC_SGPR_MAX) { - return createSRegOperand(getSgprClassId(Width), - Val - SDWA9EncValues::SRC_SGPR_MIN); - } + if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { + if (SDWA9EncValues::SRC_VGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_VGPR_MAX) { + return createRegOperand(getVgprClassId(Width), + Val - SDWA9EncValues::SRC_VGPR_MIN); + } + if (SDWA9EncValues::SRC_SGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_SGPR_MAX) { + return createSRegOperand(getSgprClassId(Width), + Val - SDWA9EncValues::SRC_SGPR_MIN); + } - return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); + return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); + } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + return createRegOperand(getVgprClassId(Width), Val); + } + llvm_unreachable("unsupported target"); } -MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const { - return decodeSDWA9Src(OPW16, Val); +MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { + return decodeSDWASrc(OPW16, Val); } -MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const { - return decodeSDWA9Src(OPW32, Val); +MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { + return decodeSDWASrc(OPW32, Val); } -MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const { +MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { using namespace AMDGPU::SDWA; + assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && + "SDWAVopcDst should be present only on GFX9"); if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; if (Val > AMDGPU::EncValues::SGPR_MAX) { diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 5fa3cf1a223fa..3d71db909e20d 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -65,6 +65,8 @@ public: uint64_t Inst, uint64_t Address) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; + MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; MCOperand decodeOperand_VS_64(unsigned Val) const; @@ -105,10 +107,10 @@ public: MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; - MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const; - MCOperand decodeSDWA9Src16(unsigned Val) const; - MCOperand decodeSDWA9Src32(unsigned Val) const; - MCOperand decodeSDWA9VopcDst(unsigned Val) const; + MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSDWASrc16(unsigned Val) const; + MCOperand decodeSDWASrc32(unsigned Val) const; + MCOperand decodeSDWAVopcDst(unsigned Val) const; }; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 98eda288bcacb..edca6fcd812c8 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -31,8 +31,6 @@ class FLAT_Pseudo<string opName, dag outs, dag ins, let VM_CNT = 1; let LGKM_CNT = 1; - let Uses = [EXEC, FLAT_SCR]; // M0 - let UseNamedOperandTable = 1; let hasSideEffects = 0; let SchedRW = [WriteVMEM]; @@ -40,10 +38,16 @@ class FLAT_Pseudo<string opName, dag outs, dag ins, string Mnemonic = opName; string AsmOperands = asmOps; + bits<1> is_flat_global = 0; + bits<1> is_flat_scratch = 0; + bits<1> has_vdst = 1; bits<1> has_data = 1; bits<1> has_glc = 1; bits<1> glcValue = 0; + + // TODO: M0 if it could possibly access LDS (before gfx9? only)? + let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); } class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : @@ -68,7 +72,10 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : // Only valid on gfx9 bits<1> lds = 0; // XXX - What does this actually do? - bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + + // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + bits<2> seg = !if(ps.is_flat_global, 0b10, + !if(ps.is_flat_scratch, 0b01, 0)); // Signed offset. Highest bit ignored for flat and treated as 12-bit // unsigned for flat acceses. @@ -81,7 +88,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : // Only valid on GFX9+ let Inst{12-0} = offset; let Inst{13} = lds; - let Inst{15-14} = 0; + let Inst{15-14} = seg; let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); let Inst{17} = slc; @@ -106,6 +113,16 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass, let mayLoad = 1; } +class FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> : + FLAT_Load_Pseudo<opName, regClass, 1> { + let is_flat_global = 1; +} + +class FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> : + FLAT_Load_Pseudo<opName, regClass, 1> { + let is_flat_scratch = 1; +} + class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, bit HasSignedOffset = 0> : FLAT_Pseudo< opName, @@ -119,6 +136,16 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, let has_vdst = 0; } +class FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> : + FLAT_Store_Pseudo<opName, regClass, 1> { + let is_flat_global = 1; +} + +class FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> : + FLAT_Store_Pseudo<opName, regClass, 1> { + let is_flat_scratch = 1; +} + multiclass FLAT_Atomic_Pseudo< string opName, RegisterClass vdst_rc, @@ -306,6 +333,26 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", } // End SubtargetPredicate = isCI +let SubtargetPredicate = HasFlatGlobalInsts in { +def GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; +def GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; +def GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; +def GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; +def GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; +def GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; +def GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; +def GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; + +def GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; +def GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; +def GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; +def GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; +def GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; +def GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; + +} // End SubtargetPredicate = HasFlatGlobalInsts + + //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// @@ -557,3 +604,18 @@ defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; +def GLOBAL_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, GLOBAL_LOAD_UBYTE>; +def GLOBAL_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, GLOBAL_LOAD_SBYTE>; +def GLOBAL_LOAD_USHORT_vi : FLAT_Real_vi <0x12, GLOBAL_LOAD_USHORT>; +def GLOBAL_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, GLOBAL_LOAD_SSHORT>; +def GLOBAL_LOAD_DWORD_vi : FLAT_Real_vi <0x14, GLOBAL_LOAD_DWORD>; +def GLOBAL_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, GLOBAL_LOAD_DWORDX2>; +def GLOBAL_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, GLOBAL_LOAD_DWORDX4>; +def GLOBAL_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, GLOBAL_LOAD_DWORDX3>; + +def GLOBAL_STORE_BYTE_vi : FLAT_Real_vi <0x18, GLOBAL_STORE_BYTE>; +def GLOBAL_STORE_SHORT_vi : FLAT_Real_vi <0x1a, GLOBAL_STORE_SHORT>; +def GLOBAL_STORE_DWORD_vi : FLAT_Real_vi <0x1c, GLOBAL_STORE_DWORD>; +def GLOBAL_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, GLOBAL_STORE_DWORDX2>; +def GLOBAL_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, GLOBAL_STORE_DWORDX4>; +def GLOBAL_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, GLOBAL_STORE_DWORDX3>; diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index b84640230eeeb..7c31c8e397ba7 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -72,6 +72,11 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff); } +void AMDGPUInstPrinter::printS16ImmDecOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatDec(static_cast<int16_t>(MI->getOperand(OpNo).getImm())); +} + void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -118,6 +123,16 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint16_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm != 0) { + O << ((OpNo == 0)? "offset:" : " offset:"); + printS16ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -216,6 +231,24 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo, O << " vm"; } +void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) { + O << " dfmt:"; + printU8ImmDecOperand(MI, OpNo, O); + } +} + +void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) { + O << " nfmt:"; + printU8ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI) { switch (RegNo) { @@ -379,7 +412,6 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O) { uint16_t Lo16 = static_cast<uint16_t>(Imm); - assert(Lo16 == static_cast<uint16_t>(Imm >> 16)); printImmediate16(Lo16, STI, O); } diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index c8094c4b840a1..7bbf99a85f409 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -42,6 +42,7 @@ private: void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O, @@ -52,6 +53,9 @@ private: void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printOffsetS13(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printOffset1(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -84,6 +88,10 @@ private: const MCSubtargetInfo &STI, raw_ostream &O); void printExpVM(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printDFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printNFMT(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printRegOperand(unsigned RegNo, raw_ostream &O); void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 0a9c2b94c1eee..2b408ff10caae 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -30,14 +30,9 @@ public: unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; }; - void processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { @@ -102,36 +97,11 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } } -void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { - MCValue Res; - - // When we have complex expressions like: BB0_1 + (BB0_2 - 4), which are - // used for long branches, this function will be called with - // IsResolved = false and Value set to some pre-computed value. In - // the example above, the value would be: - // (BB0_1 + (BB0_2 - 4)) - CurrentOffsetFromStartOfFunction. - // This is not what we want. We just want the expression computation - // only. The reason the MC layer subtracts the current offset from the - // expression is because the fixup is of kind FK_PCRel_4. - // For these scenarios, evaluateAsValue gives us the computation that we - // want. - if (!IsResolved && Fixup.getValue()->evaluateAsValue(Res, Layout) && - Res.isAbsolute()) { - Value = Res.getConstant(); - IsResolved = true; - - } - if (IsResolved) - Value = adjustFixupValue(Fixup, Value, &Asm.getContext()); -} - -void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { + Value = adjustFixupValue(Fixup, Value, &Asm.getContext()); if (!Value) return; // Doesn't change encoding. @@ -142,7 +112,7 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); uint32_t Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the bits from // the fixup value. diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index a856b17a228f0..1b062064ace1c 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -52,15 +52,15 @@ public: return 0; } - virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { + virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { return 0; } - virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { + virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { return 0; } diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index e02acf516c0db..376c9bfe5ccf2 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -70,13 +70,13 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; - unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const override; + unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; - unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const override; + unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace @@ -252,9 +252,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { uint16_t Lo16 = static_cast<uint16_t>(Imm); - assert(Lo16 == static_cast<uint16_t>(Imm >> 16)); uint32_t Encoding = getLit16Encoding(Lo16, STI); - assert(Encoding != 255 && "packed constants can only be inline immediates"); return Encoding; } default: @@ -328,11 +326,11 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, } unsigned -SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { +SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; - + uint64_t RegEnc = 0; const MCOperand &MO = MI.getOperand(OpNo); @@ -347,9 +345,9 @@ SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, } unsigned -SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { +SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; @@ -365,6 +363,25 @@ SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, return RegEnc; } +static bool needsPCRel(const MCExpr *Expr) { + switch (Expr->getKind()) { + case MCExpr::SymbolRef: + return true; + case MCExpr::Binary: { + auto *BE = cast<MCBinaryExpr>(Expr); + if (BE->getOpcode() == MCBinaryExpr::Sub) + return false; + return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS()); + } + case MCExpr::Unary: + return needsPCRel(cast<MCUnaryExpr>(Expr)->getSubExpr()); + case MCExpr::Target: + case MCExpr::Constant: + return false; + } + llvm_unreachable("invalid kind"); +} + uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, @@ -373,12 +390,21 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, return MRI.getEncodingValue(MO.getReg()); if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) { - const auto *Expr = dyn_cast<MCSymbolRefExpr>(MO.getExpr()); + // FIXME: If this is expression is PCRel or not should not depend on what + // the expression looks like. Given that this is just a general expression, + // it should probably be FK_Data_4 and whatever is producing + // + // s_add_u32 s2, s2, (extern_const_addrspace+16 + // + // And expecting a PCRel should instead produce + // + // .Ltmp1: + // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1 MCFixupKind Kind; - if (Expr && Expr->getSymbol().isExternal()) - Kind = FK_Data_4; - else + if (needsPCRel(MO.getExpr())) Kind = FK_PCRel_4; + else + Kind = FK_Data_4; Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc())); } diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index f6f2582aa11b3..d30d1d382588c 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -80,7 +80,7 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"gfx600", SIFullSpeedModel, +def : ProcessorModel<"gfx600", SIFullSpeedModel, [FeatureISAVersion6_0_0]>; def : ProcessorModel<"SI", SIFullSpeedModel, @@ -95,7 +95,7 @@ def : ProcessorModel<"gfx601", SIQuarterSpeedModel, [FeatureISAVersion6_0_1] >; -def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>; def : ProcessorModel<"verde", SIQuarterSpeedModel, diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index c55878f8bff0f..215791f4f92dd 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -584,23 +584,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return LowerImplicitParameter(DAG, VT, DL, 8); case Intrinsic::r600_read_tgid_x: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_X, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_X, VT); case Intrinsic::r600_read_tgid_y: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Y, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Y, VT); case Intrinsic::r600_read_tgid_z: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Z, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Z, VT); case Intrinsic::r600_read_tidig_x: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_X, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_X, VT); case Intrinsic::r600_read_tidig_y: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Y, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Y, VT); case Intrinsic::r600_read_tidig_z: - return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Z, VT); + return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Z, VT); case Intrinsic::r600_recipsqrt_ieee: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 5cd90323ff67b..3915c0e5bdbed 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -118,9 +118,9 @@ namespace AMDGPU { // Operand for source modifiers for VOP instructions OPERAND_INPUT_MODS, - // Operand for GFX9 SDWA instructions - OPERAND_SDWA9_SRC, - OPERAND_SDWA9_VOPC_DST, + // Operand for SDWA instructions + OPERAND_SDWA_SRC, + OPERAND_SDWA_VOPC_DST, /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 5f5f25103c027..0a795c99f94e5 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -174,6 +174,31 @@ static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); } +static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, + const SIRegisterInfo *TRI, + const SIInstrInfo *TII) { + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + auto &Src = MI.getOperand(1); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = Src.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + !TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + + for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) { + const auto *UseMI = MO.getParent(); + if (UseMI == &MI) + continue; + if (MO.isDef() || UseMI->getParent() != MI.getParent() || + UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END || + !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src)) + return false; + } + // Change VGPR to SGPR destination. + MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg))); + return true; +} + // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. // // SGPRx = ... @@ -214,6 +239,9 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) return false; + if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII)) + return true; + // TODO: Could have multiple extracts? unsigned SubReg = CopyUse.getOperand(1).getSubReg(); if (SubReg != AMDGPU::NoSubRegister) @@ -563,6 +591,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; } TII->moveToVALU(MI); + } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { + tryChangeVGPRtoSGPRinCopy(MI, TRI, TII); } break; diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index e10f1ed3762e8..f391f67a241f1 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -13,6 +13,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -166,6 +167,8 @@ static bool updateOperand(FoldCandidate &Fold, if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && TargetRegisterInfo::isVirtualRegister(New->getReg())) { Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); + + Old.setIsUndef(New->isUndef()); return true; } @@ -470,7 +473,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI, return &Op; MachineInstr *Def = MRI.getVRegDef(Op.getReg()); - if (Def->isMoveImmediate()) { + if (Def && Def->isMoveImmediate()) { MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) return &ImmSrc; @@ -921,12 +924,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // level. bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath(); - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; - for (I = MBB.begin(); I != MBB.end(); I = Next) { + for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index b1bd14e421f02..08a64de385018 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -284,7 +284,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { + if (ST.isAmdCodeObjectV2(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } @@ -363,14 +363,14 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - if (MFI->hasPrivateMemoryInputPtr()) { + if (MFI->hasImplicitBufferPtr()) { unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); BuildMI(MBB, I, DL, Mov64, Rsrc01) - .addReg(PreloadedPrivateBufferReg) + .addReg(MFI->getImplicitBufferPtrUserSGPR()) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); } else { const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); @@ -385,7 +385,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, MachineMemOperand::MODereferenceable, 0, 0); BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) - .addReg(PreloadedPrivateBufferReg) + .addReg(MFI->getImplicitBufferPtrUserSGPR()) .addImm(0) // offset .addImm(0) // glc .addMemOperand(MMO) @@ -417,14 +417,69 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - if (MFI->isEntryFunction()) + const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + if (FuncInfo->isEntryFunction()) { emitEntryFunctionPrologue(MF, MBB); + return; + } + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); + + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + + bool NeedFP = hasFP(MF); + if (NeedFP) { + // If we need a base pointer, set it up here. It's whatever the value of + // the stack pointer is at this point. Any variable size objects will be + // allocated after this, so we can still use the base pointer to reference + // locals. + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) + .addReg(StackPtrReg) + .setMIFlag(MachineInstr::FrameSetup); + } + + uint32_t NumBytes = MFI.getStackSize(); + if (NumBytes != 0 && hasSP(MF)) { + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) + .addReg(StackPtrReg) + .addImm(NumBytes * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameSetup); + } } void SIFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + if (FuncInfo->isEntryFunction()) + return; + unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + if (StackPtrReg == AMDGPU::NoRegister) + return; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + uint32_t NumBytes = MFI.getStackSize(); + + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL; + + // FIXME: Clarify distinction between no set SP and SP. For callee functions, + // it's really whether we need SP to be accurate or not. + + if (NumBytes != 0 && hasSP(MF)) { + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) + .addReg(StackPtrReg) + .addImm(NumBytes * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameDestroy); + } } static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { @@ -557,3 +612,19 @@ void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI); } } + +bool SIFrameLowering::hasFP(const MachineFunction &MF) const { + // All stack operations are relative to the frame offset SGPR. + // TODO: Still want to eliminate sometimes. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // XXX - Is this only called after frame is finalized? Should be able to check + // frame size. + return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI); +} + +bool SIFrameLowering::hasSP(const MachineFunction &MF) const { + // All stack operations are relative to the frame offset SGPR. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasCalls() || MFI.hasVarSizedObjects(); +} diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index e17adbe273614..d4dfa1c7eaa86 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -60,6 +60,10 @@ private: /// \brief Emits debugger prologue. void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; + +public: + bool hasFP(const MachineFunction &MF) const override; + bool hasSP(const MachineFunction &MF) const; }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 441f1ef4bd04c..d0f4e00994de1 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -211,6 +211,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDO, MVT::i32, Legal); setOperationAction(ISD::USUBO, MVT::i32, Legal); + setOperationAction(ISD::ADDCARRY, MVT::i32, Legal); + setOperationAction(ISD::SUBCARRY, MVT::i32, Legal); + // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, @@ -471,6 +474,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); } + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::ADDCARRY); + setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::SUBCARRY); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FMINNUM); @@ -1061,10 +1068,10 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) { - if (Info.hasPrivateMemoryInputPtr()) { - unsigned PrivateMemoryPtrReg = Info.addPrivateMemoryPtr(TRI); - MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SGPR_64RegClass); - CCInfo.AllocateReg(PrivateMemoryPtrReg); + if (Info.hasImplicitBufferPtr()) { + unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI); + MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(ImplicitBufferPtrReg); } // FIXME: How should these inputs interact with inreg / custom SGPR inputs? @@ -1227,7 +1234,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, } } - if (NeedSP){ + if (NeedSP) { unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF); Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg); @@ -2998,7 +3005,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntrinsicID) { case Intrinsic::amdgcn_implicit_buffer_ptr: { - unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + if (getSubtarget()->isAmdCodeObjectV2(MF)) + return emitNonHSAIntrinsicError(DAG, DL, VT); + + unsigned Reg = TRI->getPreloadedValue(MF, + SIRegisterInfo::IMPLICIT_BUFFER_PTR); return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); } case Intrinsic::amdgcn_dispatch_ptr: @@ -3288,6 +3299,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); SDLoc DL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: { @@ -3313,7 +3326,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // glc Op.getOperand(6) // slc }; - MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? @@ -3328,6 +3340,29 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO); } + case Intrinsic::amdgcn_tbuffer_load: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Op.getOperand(4), // voffset + Op.getOperand(5), // soffset + Op.getOperand(6), // offset + Op.getOperand(7), // dfmt + Op.getOperand(8), // nfmt + Op.getOperand(9), // glc + Op.getOperand(10) // slc + }; + + EVT VT = Op.getOperand(2).getValueType(); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOLoad, + VT.getStoreSize(), VT.getStoreSize()); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, + Op->getVTList(), Ops, VT, MMO); + } // Basic sample. case Intrinsic::amdgcn_image_sample: case Intrinsic::amdgcn_image_sample_cl: @@ -3393,10 +3428,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); SDLoc DL(Op); SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + MachineFunction &MF = DAG.getMachineFunction(); switch (IntrinsicID) { case Intrinsic::amdgcn_exp: { @@ -3463,33 +3498,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain, Op.getOperand(2), Op.getOperand(3)); } - case AMDGPUIntrinsic::SI_tbuffer_store: { - SDValue Ops[] = { - Chain, - Op.getOperand(2), - Op.getOperand(3), - Op.getOperand(4), - Op.getOperand(5), - Op.getOperand(6), - Op.getOperand(7), - Op.getOperand(8), - Op.getOperand(9), - Op.getOperand(10), - Op.getOperand(11), - Op.getOperand(12), - Op.getOperand(13), - Op.getOperand(14) - }; - - EVT VT = Op.getOperand(3).getValueType(); - - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOStore, - VT.getStoreSize(), 4); - return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, - Op->getVTList(), Ops, VT, MMO); - } case AMDGPUIntrinsic::AMDGPU_kill: { SDValue Src = Op.getOperand(2); if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) { @@ -3505,7 +3513,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { - const MachineFunction &MF = DAG.getMachineFunction(); const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second; if (WGSize <= ST.getWavefrontSize()) @@ -3514,6 +3521,75 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } return SDValue(); }; + case AMDGPUIntrinsic::SI_tbuffer_store: { + + // Extract vindex and voffset from vaddr as appropriate + const ConstantSDNode *OffEn = cast<ConstantSDNode>(Op.getOperand(10)); + const ConstantSDNode *IdxEn = cast<ConstantSDNode>(Op.getOperand(11)); + SDValue VAddr = Op.getOperand(5); + + SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); + + assert(!(OffEn->isOne() && IdxEn->isOne()) && + "Legacy intrinsic doesn't support both offset and index - use new version"); + + SDValue VIndex = IdxEn->isOne() ? VAddr : Zero; + SDValue VOffset = OffEn->isOne() ? VAddr : Zero; + + // Deal with the vec-3 case + const ConstantSDNode *NumChannels = cast<ConstantSDNode>(Op.getOperand(4)); + auto Opcode = NumChannels->getZExtValue() == 3 ? + AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT; + + SDValue Ops[] = { + Chain, + Op.getOperand(3), // vdata + Op.getOperand(2), // rsrc + VIndex, + VOffset, + Op.getOperand(6), // soffset + Op.getOperand(7), // inst_offset + Op.getOperand(8), // dfmt + Op.getOperand(9), // nfmt + Op.getOperand(12), // glc + Op.getOperand(13), // slc + }; + + assert((cast<ConstantSDNode>(Op.getOperand(14)))->getZExtValue() == 0 && + "Value of tfe other than zero is unsupported"); + + EVT VT = Op.getOperand(3).getValueType(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOStore, + VT.getStoreSize(), 4); + return DAG.getMemIntrinsicNode(Opcode, DL, + Op->getVTList(), Ops, VT, MMO); + } + + case Intrinsic::amdgcn_tbuffer_store: { + SDValue Ops[] = { + Chain, + Op.getOperand(2), // vdata + Op.getOperand(3), // rsrc + Op.getOperand(4), // vindex + Op.getOperand(5), // voffset + Op.getOperand(6), // soffset + Op.getOperand(7), // offset + Op.getOperand(8), // dfmt + Op.getOperand(9), // nfmt + Op.getOperand(10), // glc + Op.getOperand(11) // slc + }; + EVT VT = Op.getOperand(3).getValueType(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOStore, + VT.getStoreSize(), 4); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, + Op->getVTList(), Ops, VT, MMO); + } + default: return Op; } @@ -4839,6 +4915,103 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, return 0; } +SDValue SITargetLowering::performAddCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if (VT != MVT::i32) + return SDValue(); + + SDLoc SL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // add x, zext (setcc) => addcarry x, 0, setcc + // add x, sext (setcc) => subcarry x, 0, setcc + unsigned Opc = LHS.getOpcode(); + if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND || + Opc == ISD::ANY_EXTEND || Opc == ISD::ADDCARRY) + std::swap(RHS, LHS); + + Opc = RHS.getOpcode(); + switch (Opc) { + default: break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: { + auto Cond = RHS.getOperand(0); + if (Cond.getOpcode() != ISD::SETCC && + Cond.getOpcode() != AMDGPUISD::FP_CLASS) + break; + SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1); + SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond }; + Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY; + return DAG.getNode(Opc, SL, VTList, Args); + } + case ISD::ADDCARRY: { + // add x, (addcarry y, 0, cc) => addcarry x, y, cc + auto C = dyn_cast<ConstantSDNode>(RHS.getOperand(1)); + if (!C || C->getZExtValue() != 0) break; + SDValue Args[] = { LHS, RHS.getOperand(0), RHS.getOperand(2) }; + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), RHS->getVTList(), Args); + } + } + return SDValue(); +} + +SDValue SITargetLowering::performSubCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if (VT != MVT::i32) + return SDValue(); + + SDLoc SL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + unsigned Opc = LHS.getOpcode(); + if (Opc != ISD::SUBCARRY) + std::swap(RHS, LHS); + + if (LHS.getOpcode() == ISD::SUBCARRY) { + // sub (subcarry x, 0, cc), y => subcarry x, y, cc + auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); + if (!C || C->getZExtValue() != 0) + return SDValue(); + SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) }; + return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args); + } + return SDValue(); +} + +SDValue SITargetLowering::performAddCarrySubCarryCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + + if (N->getValueType(0) != MVT::i32) + return SDValue(); + + auto C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C || C->getZExtValue() != 0) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue LHS = N->getOperand(0); + + // addcarry (add x, y), 0, cc => addcarry x, y, cc + // subcarry (sub x, y), 0, cc => subcarry x, y, cc + unsigned LHSOpc = LHS.getOpcode(); + unsigned Opc = N->getOpcode(); + if ((LHSOpc == ISD::ADD && Opc == ISD::ADDCARRY) || + (LHSOpc == ISD::SUB && Opc == ISD::SUBCARRY)) { + SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2) }; + return DAG.getNode(Opc, SDLoc(N), N->getVTList(), Args); + } + return SDValue(); +} + SDValue SITargetLowering::performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) @@ -5009,6 +5182,13 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); + case ISD::ADD: + return performAddCombine(N, DCI); + case ISD::SUB: + return performSubCombine(N, DCI); + case ISD::ADDCARRY: + case ISD::SUBCARRY: + return performAddCarrySubCarryCombine(N, DCI); case ISD::FADD: return performFAddCombine(N, DCI); case ISD::FSUB: @@ -5425,15 +5605,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL, return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops); } -SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, - const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const { - SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT); - - return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()), - cast<RegisterSDNode>(VReg)->getReg(), VT); -} - //===----------------------------------------------------------------------===// // SI Inline Assembly Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 8e2ec40b224cd..24f88e632d38e 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -108,6 +108,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { unsigned getFusedOpcode(const SelectionDAG &DAG, const SDNode *N0, const SDNode *N1) const; + SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -216,8 +219,6 @@ public: void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override; - SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const override; SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 1097814e99ce2..c9b48fea7225e 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2108,7 +2108,9 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const { - if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET) + if (!MO.isImm() || + OperandType < AMDGPU::OPERAND_SRC_FIRST || + OperandType > AMDGPU::OPERAND_SRC_LAST) return false; // MachineOperand provides no way to tell the true operand size, since it only @@ -2433,8 +2435,73 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + // Verify SDWA + if (isSDWA(MI)) { + + if (!ST.hasSDWA()) { + ErrInfo = "SDWA is not supported on this target"; + return false; + } + + int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); + if ( DstIdx == -1) + DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst); + + const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; + + for (int OpIdx: OpIndicies) { + if (OpIdx == -1) + continue; + const MachineOperand &MO = MI.getOperand(OpIdx); + + if (!ST.hasSDWAScalar()) { + // Only VGPRS on VI + if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { + ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; + return false; + } + } else { + // No immediates on GFX9 + if (!MO.isReg()) { + ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9"; + return false; + } + } + } + + if (!ST.hasSDWAOmod()) { + // No omod allowed on VI + const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod != nullptr && + (!OMod->isImm() || OMod->getImm() != 0)) { + ErrInfo = "OMod not allowed in SDWA instructions on VI"; + return false; + } + } + + uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); + if (isVOPC(BasicOpcode)) { + if (!ST.hasSDWASdst() && DstIdx != -1) { + // Only vcc allowed as dst on VI for VOPC + const MachineOperand &Dst = MI.getOperand(DstIdx); + if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { + ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; + return false; + } + } else if (!ST.hasSDWAClampVOPC()) { + // No clamp allowed on GFX9 for VOPC + const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); + if (Clamp != nullptr && + (!Clamp->isImm() || Clamp->getImm() != 0)) { + ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; + return false; + } + } + } + } + // Verify VOP* - if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) { + if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) { // Only look at the true operands. Only a real operand can use the constant // bus, and we don't want to check pseudo-operands like the source modifier // flags. diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index f6e5e8883f63c..74b48c7618087 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -814,6 +814,9 @@ namespace AMDGPU { int getSDWAOp(uint16_t Opcode); LLVM_READONLY + int getBasicFromSDWAOp(uint16_t Opcode); + + LLVM_READONLY int getCommuteRev(uint16_t Opcode); LLVM_READONLY diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 470a47b024433..3b4a8b5d1e817 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -20,6 +20,8 @@ def SIEncodingFamily { int NONE = -1; int SI = 0; int VI = 1; + int SDWA = 2; + int SDWA9 = 3; } //===----------------------------------------------------------------------===// @@ -39,25 +41,41 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] >; -def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", - SDTypeProfile<0, 13, - [SDTCisVT<0, v4i32>, // rsrc(SGPR) - SDTCisVT<1, iAny>, // vdata(VGPR) - SDTCisVT<2, i32>, // num_channels(imm) - SDTCisVT<3, i32>, // vaddr(VGPR) +def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", + SDTypeProfile<1, 9, + [ // vdata + SDTCisVT<1, v4i32>, // rsrc + SDTCisVT<2, i32>, // vindex(VGPR) + SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) - SDTCisVT<5, i32>, // inst_offset(imm) + SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // dfmt(imm) SDTCisVT<7, i32>, // nfmt(imm) - SDTCisVT<8, i32>, // offen(imm) - SDTCisVT<9, i32>, // idxen(imm) - SDTCisVT<10, i32>, // glc(imm) - SDTCisVT<11, i32>, // slc(imm) - SDTCisVT<12, i32> // tfe(imm) + SDTCisVT<8, i32>, // glc(imm) + SDTCisVT<9, i32> // slc(imm) ]>, - [SDNPMayStore, SDNPMemOperand, SDNPHasChain] + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] >; +def SDTtbuffer_store : SDTypeProfile<0, 10, + [ // vdata + SDTCisVT<1, v4i32>, // rsrc + SDTCisVT<2, i32>, // vindex(VGPR) + SDTCisVT<3, i32>, // voffset(VGPR) + SDTCisVT<4, i32>, // soffset(SGPR) + SDTCisVT<5, i32>, // offset(imm) + SDTCisVT<6, i32>, // dfmt(imm) + SDTCisVT<7, i32>, // nfmt(imm) + SDTCisVT<8, i32>, // glc(imm) + SDTCisVT<9, i32> // slc(imm) + ]>; + +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; +def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3", + SDTtbuffer_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; + def SDTBufferLoad : SDTypeProfile<1, 5, [ // vdata SDTCisVT<1, v4i32>, // rsrc @@ -452,25 +470,25 @@ def ExpSrc3 : RegisterOperand<VGPR_32> { let ParserMatchClass = VReg32OrOffClass; } -class SDWA9Src : RegisterOperand<VS_32> { +class SDWASrc : RegisterOperand<VS_32> { let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_SDWA9_SRC"; - let EncoderMethod = "getSDWA9SrcEncoding"; + let OperandType = "OPERAND_SDWA_SRC"; + let EncoderMethod = "getSDWASrcEncoding"; } -def SDWA9Src32 : SDWA9Src { - let DecoderMethod = "decodeSDWA9Src32"; +def SDWASrc32 : SDWASrc { + let DecoderMethod = "decodeSDWASrc32"; } -def SDWA9Src16 : SDWA9Src { - let DecoderMethod = "decodeSDWA9Src16"; +def SDWASrc16 : SDWASrc { + let DecoderMethod = "decodeSDWASrc16"; } -def SDWA9VopcDst : VOPDstOperand<SReg_64> { +def SDWAVopcDst : VOPDstOperand<SReg_64> { let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_SDWA9_VOPC_DST"; - let EncoderMethod = "getSDWA9VopcDstEncoding"; - let DecoderMethod = "decodeSDWA9VopcDst"; + let OperandType = "OPERAND_SDWA_VOPC_DST"; + let EncoderMethod = "getSDWAVopcDstEncoding"; + let DecoderMethod = "decodeSDWAVopcDst"; } class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass { @@ -525,7 +543,7 @@ def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>; -def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>; +def offset_s13 : NamedOperandS13<"OffsetS13", NamedMatchClass<"OffsetS13">>; def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; @@ -545,6 +563,9 @@ def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; +def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>; +def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>; + def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; @@ -634,13 +655,13 @@ class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; -def FPRegInputModsMatchClass : AsmOperandClass { - let Name = "RegWithFPInputMods"; +def FPRegSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWARegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; - let PredicateMethod = "isRegKind"; + let PredicateMethod = "isSDWARegKind"; } -def FPRegInputMods : InputMods <FPRegInputModsMatchClass> { +def FPRegSDWAInputMods : InputMods <FPRegSDWAInputModsMatchClass> { let PrintMethod = "printOperandAndFPInputMods"; } @@ -655,13 +676,13 @@ def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { } -def IntRegInputModsMatchClass : AsmOperandClass { - let Name = "RegWithIntInputMods"; +def IntRegSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWARegWithIntInputMods"; let ParserMethod = "parseRegWithIntInputMods"; - let PredicateMethod = "isRegKind"; + let PredicateMethod = "isSDWARegKind"; } -def IntRegInputMods : InputMods <IntRegInputModsMatchClass> { +def IntRegSDWAInputMods : InputMods <IntRegSDWAInputModsMatchClass> { let PrintMethod = "printOperandAndIntInputMods"; } @@ -851,10 +872,10 @@ class getVALUDstForVT<ValueType VT> { } // Returns the register class to use for the destination of VOP[12C] -// instructions with GFX9 SDWA extension -class getSDWA9DstForVT<ValueType VT> { +// instructions with SDWA extension +class getSDWADstForVT<ValueType VT> { RegisterOperand ret = !if(!eq(VT.Size, 1), - SDWA9VopcDst, // VOPC + SDWAVopcDst, // VOPC VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst } @@ -898,8 +919,8 @@ class getVregSrcForVT<ValueType VT> { !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } -class getSDWA9SrcForVT <ValueType VT> { - RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32); +class getSDWASrcForVT <ValueType VT> { + RegisterOperand ret = !if(!eq(VT.Size, 16), SDWASrc16, SDWASrc32); } // Returns the register class to use for sources of VOP3 instructions for the @@ -995,7 +1016,7 @@ class getSrcMod <ValueType VT> { ); } -// Return type of input modifiers operand specified input operand for SDWA/DPP +// Return type of input modifiers operand specified input operand for DPP class getSrcModExt <ValueType VT> { bit isFP = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, @@ -1004,13 +1025,13 @@ class getSrcModExt <ValueType VT> { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } -// Return type of input modifiers operand specified input operand for SDWA 9 -class getSrcModSDWA9 <ValueType VT> { +// Return type of input modifiers operand specified input operand for SDWA +class getSrcModSDWA <ValueType VT> { bit isFP = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, !if(!eq(VT.Value, f64.Value), 1, 0))); - Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods); + Operand ret = !if(isFP, FPRegSDWAInputMods, IntRegSDWAInputMods); } // Returns the input arguments for VOP[12C] instructions for the given SrcVT. @@ -1141,36 +1162,12 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, /* endif */))); } -class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, - bit HasFloatModifiers, Operand Src0Mod, Operand Src1Mod, - ValueType DstVT> { - dag ret = !if(!eq(NumSrcArgs, 0), - // VOP1 without input operands (V_NOP) - (ins), - !if(!eq(NumSrcArgs, 1), - // VOP1_SDWA - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel), - !if(!eq(NumSrcArgs, 2), - !if(!eq(DstVT.Size, 1), - // VOPC_SDWA with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP2_SDWA with modifiers - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel)), - (ins)/* endif */))); -} -// Ins for GFX9 SDWA -class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, - bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, - ValueType DstVT> { +// Ins for SDWA +class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, + bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, + ValueType DstVT> { dag ret = !if(!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) @@ -1178,31 +1175,31 @@ class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArg !if(!eq(NumSrcArgs, 1), // VOP1 !if(!eq(HasSDWAOMod, 0), - // VOP1_SDWA9 without omod + // VOP1_SDWA without omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel), - // VOP1_SDWA9 with omod + // VOP1_SDWA with omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel)), !if(!eq(NumSrcArgs, 2), !if(!eq(DstVT.Size, 1), - // VOPC_SDWA9 + // VOPC_SDWA (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, - src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP2_SDWA9 + clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA !if(!eq(HasSDWAOMod, 0), - // VOP2_SDWA9 without omod + // VOP2_SDWA without omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP1_SDWA9 with omod + // VOP2_SDWA with omod (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, omod:$omod, @@ -1220,12 +1217,12 @@ class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> { (outs)); // V_NOP } -// Outs for GFX9 SDWA -class getOutsSDWA9 <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA9> { +// Outs for SDWA +class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), - (outs DstRCSDWA9:$sdst), - (outs DstRCSDWA9:$vdst)), + (outs DstRCSDWA:$sdst), + (outs DstRCSDWA:$vdst)), (outs)); // V_NOP } @@ -1387,8 +1384,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret; - field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret; - field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT<DstVT>.ret; + field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; @@ -1396,19 +1392,15 @@ class VOPProfile <list<ValueType> _ArgVT> { field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; - field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret; - field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret; - field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT<Src0VT>.ret; - field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT<Src0VT>.ret; + field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; + field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; field Operand Src0Mod = getSrcMod<Src0VT>.ret; field Operand Src1Mod = getSrcMod<Src1VT>.ret; field Operand Src2Mod = getSrcMod<Src2VT>.ret; field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret; field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret; - field Operand Src0ModSDWA = getSrcModExt<Src0VT>.ret; - field Operand Src1ModSDWA = getSrcModExt<Src1VT>.ret; - field Operand Src0ModSDWA9 = getSrcModSDWA9<Src0VT>.ret; - field Operand Src1ModSDWA9 = getSrcModSDWA9<Src1VT>.ret; + field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; + field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); @@ -1457,8 +1449,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field dag Outs32 = Outs; field dag Outs64 = Outs; field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret; - field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCSDWA>.ret; - field dag OutsSDWA9 = getOutsSDWA9<HasDst, DstVT, DstRCSDWA9>.ret; + field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, @@ -1471,11 +1462,9 @@ class VOPProfile <list<ValueType> _ArgVT> { field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP>.ret; field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, - HasModifiers, Src0ModSDWA, Src1ModSDWA, + HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, DstVT>.ret; - field dag InsSDWA9 = getInsSDWA9<Src0SDWA9, Src1SDWA9, NumSrcArgs, - HasSDWAOMod, Src0ModSDWA9, Src1ModSDWA9, - DstVT>.ret; + field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret; @@ -1628,13 +1617,13 @@ def getSDWAOp : InstrMapping { let ValueCols = [["SDWA"]]; } -// Maps ordinary instructions to their SDWA GFX9 counterparts -def getSDWA9Op : InstrMapping { +// Maps SDWA instructions to their ordinary counterparts +def getBasicFromSDWAOp : InstrMapping { let FilterClass = "VOP"; let RowFields = ["OpName"]; let ColFields = ["AsmVariantName"]; - let KeyCol = ["Default"]; - let ValueCols = [["SDWA9"]]; + let KeyCol = ["SDWA"]; + let ValueCols = [["Default"]]; } def getMaskedMIMGOp : InstrMapping { @@ -1669,7 +1658,9 @@ def getMCOpcodeGen : InstrMapping { let ColFields = ["Subtarget"]; let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], - [!cast<string>(SIEncodingFamily.VI)]]; + [!cast<string>(SIEncodingFamily.VI)], + [!cast<string>(SIEncodingFamily.SDWA)], + [!cast<string>(SIEncodingFamily.SDWA9)]]; } // Get equivalent SOPK instruction. diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 18b197ddb7ae7..3203c38dae344 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -74,7 +74,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false), - PrivateMemoryInputPtr(false) { + ImplicitBufferPtr(false) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const Function *F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); @@ -86,6 +86,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; ScratchWaveOffsetReg = AMDGPU::SGPR4; FrameOffsetReg = AMDGPU::SGPR5; + StackPtrOffsetReg = AMDGPU::SGPR32; return; } @@ -150,7 +151,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) DispatchID = true; } else if (ST.isMesaGfxShader(MF)) { if (HasStackObjects || MaySpill) - PrivateMemoryInputPtr = true; + ImplicitBufferPtr = true; } // We don't need to worry about accessing spills with flat instructions. @@ -203,11 +204,11 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { return FlatScratchInitUserSGPR; } -unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { - PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( +unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { + ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); NumUserSGPRs += 2; - return PrivateMemoryPtrUserSGPR; + return ImplicitBufferPtrUserSGPR; } /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 9fdb8caac6f21..05aa249584bf1 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -97,7 +97,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned StackPtrOffsetReg; // Input registers for non-HSA ABI - unsigned PrivateMemoryPtrUserSGPR; + unsigned ImplicitBufferPtrUserSGPR; // Input registers setup for the HSA ABI. // User SGPRs in allocation order. @@ -179,7 +179,7 @@ private: // Private memory buffer // Compute directly in sgpr[0:1] // Other shaders indirect 64-bits at sgpr[0:1] - bool PrivateMemoryInputPtr : 1; + bool ImplicitBufferPtr : 1; MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); @@ -236,7 +236,7 @@ public: unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); - unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI); + unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); // Add system SGPRs. unsigned addWorkGroupIDX() { @@ -341,8 +341,8 @@ public: return WorkItemIDZ; } - bool hasPrivateMemoryInputPtr() const { - return PrivateMemoryInputPtr; + bool hasImplicitBufferPtr() const { + return ImplicitBufferPtr; } unsigned getNumUserSGPRs() const { @@ -396,8 +396,8 @@ public: return QueuePtrUserSGPR; } - unsigned getPrivateMemoryPtrUserSGPR() const { - return PrivateMemoryPtrUserSGPR; + unsigned getImplicitBufferPtrUserSGPR() const { + return ImplicitBufferPtrUserSGPR; } bool hasSpilledSGPRs() const { diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index f4ddf1891683b..4ac23ef03cb32 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -67,9 +67,9 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; void matchSDWAOperands(MachineFunction &MF); - bool isConvertibleToSDWA(const MachineInstr &MI) const; + bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); - void legalizeScalarOperands(MachineInstr &MI) const; + void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; StringRef getPassName() const override { return "SI Peephole SDWA"; } @@ -224,7 +224,7 @@ static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { static bool isSubregOf(const MachineOperand &SubReg, const MachineOperand &SuperReg, const TargetRegisterInfo *TRI) { - + if (!SuperReg.isReg() || !SubReg.isReg()) return false; @@ -557,7 +557,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - + if (TRI->isPhysicalRegister(Src0->getReg()) || TRI->isPhysicalRegister(Dst->getReg())) break; @@ -590,7 +590,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { break; MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - + if (TRI->isPhysicalRegister(Src1->getReg()) || TRI->isPhysicalRegister(Dst->getReg())) break; @@ -607,16 +607,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } } -bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const { +bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, + const SISubtarget &ST) const { // Check if this instruction has opcode that supports SDWA - unsigned Opc = MI.getOpcode(); - if (AMDGPU::getSDWAOp(Opc) != -1) - return true; - int Opc32 = AMDGPU::getVOPe32(Opc); - if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) - return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && - !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); - return false; + int Opc = MI.getOpcode(); + if (AMDGPU::getSDWAOp(Opc) == -1) + Opc = AMDGPU::getVOPe32(Opc); + + if (Opc == -1 || AMDGPU::getSDWAOp(Opc) == -1) + return false; + + if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) + return false; + + if (TII->isVOPC(Opc)) { + if (!ST.hasSDWASdst()) { + const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + if (SDst && SDst->getReg() != AMDGPU::VCC) + return false; + } + + if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp)) + return false; + + } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { + return false; + } + + if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 || + Opc == AMDGPU::V_MAC_F32_e32)) + return false; + + return true; } bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, @@ -641,6 +663,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); + } else { + Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + assert(Dst && + AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + SDWAInst.add(*Dst); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -677,9 +704,23 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, SDWAInst.add(*Src2); } - // Initialize clamp. + // Copy clamp if present, initialize otherwise assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); - SDWAInst.addImm(0); + MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); + if (Clamp) { + SDWAInst.add(*Clamp); + } else { + SDWAInst.addImm(0); + } + + // Copy omod if present, initialize otherwise if needed + MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod) { + assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1); + SDWAInst.add(*OMod); + } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { + SDWAInst.addImm(0); + } // Initialize dst_sel and dst_unused if present if (Dst) { @@ -733,16 +774,25 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // If an instruction was converted to SDWA it should not have immediates or SGPR -// operands. Copy its scalar operands into VGPRs. -void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const { +// operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs. +void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const { const MCInstrDesc &Desc = TII->get(MI.getOpcode()); - for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { - MachineOperand &Op = MI.getOperand(I); + unsigned ConstantBusCount = 0; + for (MachineOperand &Op: MI.explicit_uses()) { if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) continue; + + unsigned I = MI.getOperandNo(&Op); if (Desc.OpInfo[I].RegClass == -1 || !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) continue; + + if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() && + TRI->isSGPRReg(*MRI, Op.getReg())) { + ++ConstantBusCount; + continue; + } + unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), VGPR); @@ -758,22 +808,20 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const { bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); - if (!ST.hasSDWA() || - !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9 + if (!ST.hasSDWA()) return false; - } MRI = &MF.getRegInfo(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); - + // Find all SDWA operands in MF. matchSDWAOperands(MF); for (const auto &OperandPair : SDWAOperands) { const auto &Operand = OperandPair.second; MachineInstr *PotentialMI = Operand->potentialToConvert(TII); - if (PotentialMI && isConvertibleToSDWA(*PotentialMI)) { + if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { PotentialMatches[PotentialMI].push_back(Operand.get()); } } @@ -788,7 +836,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { bool Ret = !ConvertedInstructions.empty(); while (!ConvertedInstructions.empty()) - legalizeScalarOperands(*ConvertedInstructions.pop_back_val()); + legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST); return Ret; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index b611f28fcabdf..ef6ad4ad0c8f3 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1044,18 +1044,29 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, unsigned CarryOut = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned ScaledReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - // XXX - Should this use a vector shift? - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) - .addReg(DiffReg, RegState::Kill) - .addImm(Log2_32(ST.getWavefrontSize())); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) + .addImm(Log2_32(ST.getWavefrontSize())) + .addReg(DiffReg, RegState::Kill); // TODO: Fold if use instruction is another add of a constant. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) - .addReg(CarryOut, RegState::Define | RegState::Dead) - .addImm(Offset) - .addReg(ScaledReg, RegState::Kill); + if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) + .addReg(CarryOut, RegState::Define | RegState::Dead) + .addImm(Offset) + .addReg(ScaledReg, RegState::Kill); + } else { + unsigned ConstOffsetReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) + .addReg(CarryOut, RegState::Define | RegState::Dead) + .addReg(ConstOffsetReg, RegState::Kill) + .addReg(ScaledReg, RegState::Kill); + } MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC); } @@ -1341,12 +1352,11 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - if (ST.isAmdCodeObjectV2(MF)) { - assert(MFI->hasPrivateSegmentBuffer()); - return MFI->PrivateSegmentBufferUserSGPR; - } - assert(MFI->hasPrivateMemoryInputPtr()); - return MFI->PrivateMemoryPtrUserSGPR; + assert(MFI->hasPrivateSegmentBuffer()); + return MFI->PrivateSegmentBufferUserSGPR; + case SIRegisterInfo::IMPLICIT_BUFFER_PTR: + assert(MFI->hasImplicitBufferPtr()); + return MFI->ImplicitBufferPtrUserSGPR; case SIRegisterInfo::KERNARG_SEGMENT_PTR: assert(MFI->hasKernargSegmentPtr()); return MFI->KernargSegmentPtrUserSGPR; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 8fed6d5f9710f..600cc886cb595 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -197,12 +197,13 @@ public: WORKGROUP_ID_Y = 11, WORKGROUP_ID_Z = 12, PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14, + IMPLICIT_BUFFER_PTR = 15, // VGPRS: - FIRST_VGPR_VALUE = 15, + FIRST_VGPR_VALUE = 16, WORKITEM_ID_X = FIRST_VGPR_VALUE, - WORKITEM_ID_Y = 16, - WORKITEM_ID_Z = 17 + WORKITEM_ID_Y = 17, + WORKITEM_ID_Z = 18 }; /// \brief Returns the physical register that \p Value is stored in. diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index c5f121757e623..96a18544f02ac 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -92,6 +92,8 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, case AMDGPU::V_ADDC_U32_e64: case AMDGPU::V_SUBB_U32_e64: + if (TII->getNamedOperand(MI, AMDGPU::OpName::src1)->isImm()) + return false; // Additional verification is needed for sdst/src2. return true; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index f581e69980c79..26515b27bb77d 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -538,6 +538,27 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { Reg == AMDGPU::SCC; } +bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { + + if (Reg0 == Reg1) { + return true; + } + + unsigned SubReg0 = TRI->getSubReg(Reg0, 1); + if (SubReg0 == 0) { + return TRI->getSubRegIndex(Reg1, Reg0) > 0; + } + + for (unsigned Idx = 2; SubReg0 > 0; ++Idx) { + if (isRegIntersect(Reg1, SubReg0, TRI)) { + return true; + } + SubReg0 = TRI->getSubReg(Reg0, Idx); + } + + return false; +} + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { switch(Reg) { diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index eff0230d21f57..936e4921a7097 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -271,6 +271,9 @@ bool isGFX9(const MCSubtargetInfo &STI); /// \brief Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); +/// \brief Is there any intersection between registers +bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); + /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index 95b5ef0a49dba..96b33c373f052 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -93,11 +93,6 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP1"; } -class VOP1_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : - VOP_SDWA9_Pseudo <OpName, P, pattern> { - let AsmMatchConverter = "cvtSdwaVOP1"; -} - class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { list<dag> ret = !if(P.HasModifiers, @@ -117,7 +112,6 @@ multiclass VOP1Inst <string opName, VOPProfile P, def _e32 : VOP1_Pseudo <opName, P>; def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; def _sdwa : VOP1_SDWA_Pseudo <opName, P>; - def _sdwa9 : VOP1_SDWA9_Pseudo <opName, P>; } // Special profile for instructions which have clamp @@ -274,12 +268,10 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); - let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; let Asm64 = getAsm64<1, 1, 0, 1>.ret; @@ -545,8 +537,8 @@ multiclass VOP1_Real_vi <bits<10> op> { VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; def _sdwa_gfx9 : - VOP_SDWA9_Real <!cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9")>, - VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>; + VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; // For now left dpp only for asm/dasm // TODO: add corresponding pseudo diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 657cacaa792ca..7b9bc71ad4c77 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -114,11 +114,6 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP2"; } -class VOP2_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : - VOP_SDWA9_Pseudo <OpName, P, pattern> { - let AsmMatchConverter = "cvtSdwaVOP2"; -} - class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { list<dag> ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, @@ -139,7 +134,6 @@ multiclass VOP2Inst <string opName, Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; def _sdwa : VOP2_SDWA_Pseudo <opName, P>; - def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P>; } multiclass VOP2bInst <string opName, @@ -156,10 +150,6 @@ multiclass VOP2bInst <string opName, def _sdwa : VOP2_SDWA_Pseudo <opName, P> { let AsmMatchConverter = "cvtSdwaVOP2b"; } - - def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P> { - let AsmMatchConverter = "cvtSdwaVOP2b"; - } } def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, @@ -221,17 +211,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { VGPR_32:$src2, // stub argument dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, VGPR_32:$src2, // stub argument - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, - VGPR_32:$src2, // stub argument - clampmod:$clamp, omod:$omod, - dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; @@ -289,15 +275,10 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, - clampmod:$clamp, omod:$omod, - dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, Src1Mod:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, @@ -326,6 +307,8 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; + let HasExt = 0; + let HasSDWA9 = 0; } def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { @@ -335,6 +318,8 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; + let HasExt = 0; + let HasSDWA9 = 0; } //===----------------------------------------------------------------------===// @@ -397,20 +382,29 @@ def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">; } // End isConvergent = 1 -defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; -defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>; -defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; -defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; -defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; -defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst" -defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>; -defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>; -defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, AMDGPUpkrtz_f16_f32>; -defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>; -defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>; +defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; +defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; +defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>; +defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>; +defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>; +defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" +defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>; +defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>; +defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>; +defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>>; +defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>>; } // End SubtargetPredicate = isGCN +def : Pat< + (AMDGPUadde i32:$src0, i32:$src1, i1:$src2), + (V_ADDC_U32_e64 $src0, $src1, $src2) +>; + +def : Pat< + (AMDGPUsube i32:$src0, i32:$src1, i1:$src2), + (V_SUBB_U32_e64 $src0, $src1, $src2) +>; // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { @@ -728,8 +722,8 @@ multiclass VOP2_SDWA_Real <bits<6> op> { multiclass VOP2_SDWA9_Real <bits<6> op> { def _sdwa_gfx9 : - VOP_SDWA9_Real <!cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9")>, - VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>; + VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; } multiclass VOP2be_Real_e32e64_vi <bits<6> op> : diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index cd347b86d3050..f3482a22d5dcd 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -113,11 +113,6 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : let AsmMatchConverter = "cvtSdwaVOPC"; } -class VOPC_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : - VOP_SDWA9_Pseudo <OpName, P, pattern> { - let AsmMatchConverter = "cvtSdwaVOPC"; -} - // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> : InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl { @@ -189,13 +184,6 @@ multiclass VOPC_Pseudos <string opName, let isConvergent = DefExec; let isCompare = 1; } - - def _sdwa9 : VOPC_SDWA9_Pseudo <opName, P> { - let Defs = !if(DefExec, [VCC, EXEC], [VCC]); - let SchedRW = P.Schedule; - let isConvergent = DefExec; - let isCompare = 1; - } } def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; @@ -540,14 +528,12 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> : VOPC_Profile<sched, vt, i32> { let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); let Asm64 = "$sdst, $src0_modifiers, $src1"; + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); - let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, - Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, - src0_sel:$src0_sel, src1_sel:$src1_sel); + let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; - //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; @@ -580,12 +566,6 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> { let SchedRW = p.Schedule; let isConvergent = DefExec; } - - def _sdwa9 : VOPC_SDWA9_Pseudo <opName, p> { - let Defs = !if(DefExec, [VCC, EXEC], [VCC]); - let SchedRW = p.Schedule; - let isConvergent = DefExec; - } } def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>; @@ -954,8 +934,8 @@ multiclass VOPC_Real_vi <bits<10> op> { VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; def _sdwa_gfx9 : - VOP_SDWA9_Real <!cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9")>, - VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>; + VOP_SDWA9_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>, + VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"), !cast<Instruction>(NAME#"_e32_vi")> { diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index 4da654f84f9d1..e386f21c2ba49 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -232,11 +232,11 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 { let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1 let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2 - let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0) - let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1) - let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2) + let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0) + let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1) + let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2) - let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2) + let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, 0); // op_sel_hi(2) let Inst{15} = !if(P.HasClamp, clamp{0}, 0); @@ -245,8 +245,8 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 { let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); - let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0) - let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1) + let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, 0); // op_sel_hi(0) + let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, 0); // op_sel_hi(1) let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo) let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo) let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) @@ -300,6 +300,19 @@ class VOP_SDWAe<VOPProfile P> : Enc64 { let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); } +// GFX9 adds two features to SDWA: +// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. +// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather +// than VGPRs (at most 1 can be an SGPR); +// b. OMOD is the standard output modifier (result *2, *4, /2) +// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This +// replaces OMOD and the dest fields with SD and SDST (SGPR destination) +// field. +// a. When SD=1, the SDST is used as the destination for the compare result; +// b. When SD=0, VCC is used. +// +// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA + // gfx9 SDWA basic encoding class VOP_SDWA9e<VOPProfile P> : Enc64 { bits<9> src0; // {src0_sgpr{0}, src0{7-0}} @@ -353,6 +366,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : string Mnemonic = opName; string AsmOperands = P.AsmSDWA; + string AsmOperands9 = P.AsmSDWA9; let Size = 8; let mayLoad = 0; @@ -372,53 +386,9 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : VOPProfile Pfl = P; } -// GFX9 adds two features to SDWA: -// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. -// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather -// than VGPRs (at most 1 can be an SGPR); -// b. OMOD is the standard output modifier (result *2, *4, /2) -// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This -// replaces OMOD and the dest fields with SD and SDST (SGPR destination) -// field. -// a. When SD=1, the SDST is used as the destination for the compare result; -// b.when SD=0, VCC is used. -// -// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA - -class VOP_SDWA9_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : - InstSI <P.OutsSDWA9, P.InsSDWA9, "", pattern>, - VOP <opName>, - SIMCInstr <opName#"_sdwa9", SIEncodingFamily.NONE>, - MnemonicAlias <opName#"_sdwa9", opName> { - - let isPseudo = 1; - let isCodeGenOnly = 1; - let UseNamedOperandTable = 1; - - string Mnemonic = opName; - string AsmOperands = P.AsmSDWA9; - - let Size = 8; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - - let VALU = 1; - let SDWA = 1; - let Uses = [EXEC]; - - let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); - let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); - let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "SDWA9"; - - VOPProfile Pfl = P; -} - class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> : InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, - SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { + SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA> { let isPseudo = 0; let isCodeGenOnly = 0; @@ -443,9 +413,9 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> : let TSFlags = ps.TSFlags; } -class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> : - InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, - SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { +class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> : + InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []>, + SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9> { let isPseudo = 0; let isCodeGenOnly = 0; @@ -458,13 +428,15 @@ class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> : let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; + let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst); + let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst); + let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA9"; + // Copy relevant pseudo op flags - let SubtargetPredicate = ps.SubtargetPredicate; - let AssemblerPredicate = ps.AssemblerPredicate; let AsmMatchConverter = ps.AsmMatchConverter; - let AsmVariantName = ps.AsmVariantName; let UseNamedOperandTable = ps.UseNamedOperandTable; - let DecoderNamespace = ps.DecoderNamespace; let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index ca68f5d42c32c..6f67183df6a18 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -100,7 +100,8 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability and Serviceability extensions">; def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", "Enable fast computation of positive address offsets">; - +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; // Cyclone has preferred instructions for zeroing VFP registers, which can // execute in 0 cycles. diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index f9da036c7e468..90f635c812542 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1504,6 +1504,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::CONSTPOOL_ENTRY: { + if (Subtarget->genExecuteOnly()) + llvm_unreachable("execute-only should not generate constant pools"); + /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8715657ad5e25..e0810c358f2da 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -665,12 +665,14 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { const ARMFunctionInfo *AFI = MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); + // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. + // In their ARM encoding, they can't be encoded in a conditional form. + if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; + if (AFI->isThumb2Function()) { if (getSubtarget().restrictIT()) return isV8EligibleForIT(&MI); - } else { // non-Thumb - if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) - return false; } return true; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5b2d093e8f0da..2bcc707e9fc3c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2669,12 +2669,35 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. -static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here SDLoc dl(Op); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); SDValue Res; + + // When generating execute-only code Constant Pools must be promoted to the + // global data section. It's a bit ugly that we can't share them across basic + // blocks, but this way we guarantee that execute-only behaves correct with + // position-independent addressing modes. + if (Subtarget->genExecuteOnly()) { + auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); + auto T = const_cast<Type*>(CP->getType()); + auto C = const_cast<Constant*>(CP->getConstVal()); + auto M = const_cast<Module*>(DAG.getMachineFunction(). + getFunction()->getParent()); + auto GV = new GlobalVariable( + *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, + Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + + Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + + Twine(AFI->createPICLabelUId()) + ); + SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV), + dl, PtrVT); + return LowerGlobalAddress(GA, DAG); + } + if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); @@ -3118,6 +3141,19 @@ static bool isReadOnly(const GlobalValue *GV) { isa<Function>(GV); } +SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -7634,21 +7670,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); - case ISD::ConstantPool: - if (Subtarget->genExecuteOnly()) - llvm_unreachable("execute-only should not generate constant pools"); - return LowerConstantPool(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::GlobalAddress: - switch (Subtarget->getTargetTriple().getObjectFormat()) { - default: llvm_unreachable("unknown object format"); - case Triple::COFF: - return LowerGlobalAddressWindows(Op, DAG); - case Triple::ELF: - return LowerGlobalAddressELF(Op, DAG); - case Triple::MachO: - return LowerGlobalAddressDarwin(Op, DAG); - } + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 26da528c19e6d..5044134f5b1e2 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -601,6 +601,8 @@ class InstrItineraryData; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 45471a4e95b39..53db5acbe805c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4756,6 +4756,16 @@ def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift", def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift", (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +// Aliases for the above with the .w qualifier +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; + // ADR w/o the .w suffix def : t2InstAlias<"adr${p} $Rd, $addr", (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 2ae3bad4076b0..4cb0eca5ee5f8 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -42,6 +42,10 @@ public: private: bool selectImpl(MachineInstr &I) const; + bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const; + const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; const ARMBaseTargetMachine &TM; @@ -243,6 +247,105 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, return Opc; } +static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { + switch (Pred) { + // Needs two compares... + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UEQ: + default: + // AL is our "false" for now. The other two need more compares. + return ARMCC::AL; + case CmpInst::ICMP_EQ: + case CmpInst::FCMP_OEQ: + return ARMCC::EQ; + case CmpInst::ICMP_SGT: + case CmpInst::FCMP_OGT: + return ARMCC::GT; + case CmpInst::ICMP_SGE: + case CmpInst::FCMP_OGE: + return ARMCC::GE; + case CmpInst::ICMP_UGT: + case CmpInst::FCMP_UGT: + return ARMCC::HI; + case CmpInst::FCMP_OLT: + return ARMCC::MI; + case CmpInst::ICMP_ULE: + case CmpInst::FCMP_OLE: + return ARMCC::LS; + case CmpInst::FCMP_ORD: + return ARMCC::VC; + case CmpInst::FCMP_UNO: + return ARMCC::VS; + case CmpInst::FCMP_UGE: + return ARMCC::PL; + case CmpInst::ICMP_SLT: + case CmpInst::FCMP_ULT: + return ARMCC::LT; + case CmpInst::ICMP_SLE: + case CmpInst::FCMP_ULE: + return ARMCC::LE; + case CmpInst::FCMP_UNE: + case CmpInst::ICMP_NE: + return ARMCC::NE; + case CmpInst::ICMP_UGE: + return ARMCC::HS; + case CmpInst::ICMP_ULT: + return ARMCC::LO; + } +} + +bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + auto &MBB = *MIB->getParent(); + auto InsertBefore = std::next(MIB->getIterator()); + auto &DebugLoc = MIB->getDebugLoc(); + + // Move 0 into the result register. + auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi)) + .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass)) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI)) + return false; + + // Perform the comparison. + auto LHSReg = MIB->getOperand(2).getReg(); + auto RHSReg = MIB->getOperand(3).getReg(); + assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) && + MRI.getType(LHSReg).getSizeInBits() == 32 && + MRI.getType(RHSReg).getSizeInBits() == 32 && + "Unsupported types for comparison operation"); + auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr)) + .addUse(LHSReg) + .addUse(RHSReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Move 1 into the result register if the flags say so. + auto ResReg = MIB->getOperand(0).getReg(); + auto Cond = + static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate()); + auto ARMCond = getComparePred(Cond); + if (ARMCond == ARMCC::AL) + return false; + + auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi)) + .addDef(ResReg) + .addUse(Mov0I->getOperand(0).getReg()) + .addImm(1) + .add(predOps(ARMCond, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + MIB->eraseFromParent(); + return true; +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -343,6 +446,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } + case G_ICMP: + return selectICmp(MIB, TII, MRI, TRI, RBI); case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index a706079d98662..5873c7fb38729 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -86,6 +86,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_CONSTANT, s32}, Legal); + setAction({G_ICMP, s1}, Legal); + for (auto Ty : {s8, s16}) + setAction({G_ICMP, 1, Ty}, WidenScalar); + for (auto Ty : {s32, p0}) + setAction({G_ICMP, 1, Ty}, Legal); + if (!ST.useSoftFloat() && ST.hasVFP2()) { setAction({G_FADD, s32}, Legal); setAction({G_FADD, s64}, Legal); diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp new file mode 100644 index 0000000000000..1b6e97c28d453 --- /dev/null +++ b/lib/Target/ARM/ARMMacroFusion.cpp @@ -0,0 +1,57 @@ +//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "ARMMacroFusion.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI); + + // Assume wildcards for unspecified instrs. + unsigned FirstOpcode = + FirstMI ? FirstMI->getOpcode() + : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); + + if (ST.hasFuseAES()) + // Fuse AES crypto operations. + switch(SecondOpcode) { + // AES encode. + case ARM::AESMC : + return FirstOpcode == ARM::AESE || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + // AES decode. + case ARM::AESIMC: + return FirstOpcode == ARM::AESD || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + } + + return false; +} + +std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm diff --git a/lib/Target/ARM/ARMMacroFusion.h b/lib/Target/ARM/ARMMacroFusion.h new file mode 100644 index 0000000000000..1e4fc6687eae8 --- /dev/null +++ b/lib/Target/ARM/ARMMacroFusion.h @@ -0,0 +1,24 @@ +//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM definition of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createARMMacroFusionDAGMutation()); +/// to ARMPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation(); + +} // llvm diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index f59b075e6dd9a..2350d0c6ef69e 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -255,6 +255,16 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; + case G_ICMP: { + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + (void)Ty2; + assert(Ty2.getSizeInBits() == 32 && "Unsupported size for G_ICMP"); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } case G_MERGE_VALUES: { // We only support G_MERGE_VALUES for creating a double precision floating // point value out of two GPRs. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index af682dd8321cf..d890d0fa777e8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -285,6 +285,10 @@ protected: /// HasFPAO - if true, processor does positive address offset computation faster bool HasFPAO = false; + /// HasFuseAES - if true, processor executes back to back AES instruction + /// pairs faster. + bool HasFuseAES = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -561,6 +565,10 @@ public: bool hasD16() const { return HasD16; } bool hasFullFP16() const { return HasFullFP16; } + bool hasFuseAES() const { return HasFuseAES; } + /// \brief Return true if the CPU supports any kind of instruction fusion. + bool hasFusion() const { return hasFuseAES(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index c0506cfda6129..eb71e557ec917 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -17,6 +17,7 @@ #include "ARMRegisterBankInfo.h" #endif #include "ARMSubtarget.h" +#include "ARMMacroFusion.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" @@ -394,6 +395,9 @@ public: createMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMILive *DAG = createGenericSchedLive(C); // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); return DAG; } @@ -401,6 +405,9 @@ public: createPostMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMI *DAG = createGenericSchedPostRA(C); // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); return DAG; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 19fba3033bb2b..891b5c60e1fd6 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6860,6 +6860,17 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { bool ARMAsmParser::processInstruction(MCInst &Inst, const OperandVector &Operands, MCStreamer &Out) { + // Check if we have the wide qualifier, because if it's present we + // must avoid selecting a 16-bit thumb instruction. + bool HasWideQualifier = false; + for (auto &Op : Operands) { + ARMOperand &ARMOp = static_cast<ARMOperand&>(*Op); + if (ARMOp.isToken() && ARMOp.getToken() == ".w") { + HasWideQualifier = true; + break; + } + } + switch (Inst.getOpcode()) { // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. case ARM::LDRT_POST: @@ -6939,8 +6950,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && Inst.getOperand(1).getImm() <= 0xff && - !(static_cast<ARMOperand &>(*Operands[2]).isToken() && - static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w")) + !HasWideQualifier) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -6971,10 +6981,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, else if (Inst.getOpcode() == ARM::t2LDRConstPool) TmpInst.setOpcode(ARM::t2LDRpci); const ARMOperand &PoolOperand = - (static_cast<ARMOperand &>(*Operands[2]).isToken() && - static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ? - static_cast<ARMOperand &>(*Operands[4]) : - static_cast<ARMOperand &>(*Operands[3]); + (HasWideQualifier ? + static_cast<ARMOperand &>(*Operands[4]) : + static_cast<ARMOperand &>(*Operands[3])); const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm(); // If SubExprVal is a constant we may be able to use a MOV if (isa<MCConstantExpr>(SubExprVal) && @@ -8117,8 +8126,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && - !(static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8152,7 +8160,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg()) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; @@ -8186,7 +8195,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, bool isNarrow = false; if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; @@ -8415,10 +8425,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, !isARMLowRegister(Inst.getOperand(0).getReg()) || (Inst.getOperand(2).isImm() && (unsigned)Inst.getOperand(2).getImm() > 255) || - ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != 0)) || - (static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) + Inst.getOperand(5).getReg() != (inITBlock() ? 0 : ARM::CPSR) || + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? @@ -8447,8 +8455,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, } if (!Transform || Inst.getOperand(5).getReg() != 0 || - (static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(ARM::tADDhirr); @@ -8568,11 +8575,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && (Inst.getOperand(1).isImm() && (unsigned)Inst.getOperand(1).getImm() <= 255) && - ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && - Inst.getOperand(4).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(4).getReg() == 0)) && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + Inst.getOperand(4).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { // The operands aren't in the same order for tMOVi8... MCInst TmpInst; TmpInst.setOpcode(ARM::tMOVi8); @@ -8593,8 +8597,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { // The operands aren't the same for tMOV[S]r... (no cc_out) MCInst TmpInst; TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); @@ -8616,8 +8619,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == 0 && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("Illegal opcode!"); @@ -8716,11 +8718,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if ((isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand &>(*Operands[3]).isToken() || - !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8756,11 +8755,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(2).getReg())) && (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand &>(*Operands[3]).isToken() || - !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 3cde43967568b..cf6827fd6ca19 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -49,6 +49,7 @@ add_llvm_target(ARMCodeGen ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp + ARMMacroFusion.cpp ARMRegisterInfo.cpp ARMOptimizeBarriersPass.cpp ARMSelectionDAGInfo.cpp diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 716492ea25662..81760f03940ad 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -358,11 +358,27 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, return Value; } -unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, +unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, bool IsPCRel, MCContext &Ctx, bool IsLittleEndian, bool IsResolved) const { unsigned Kind = Fixup.getKind(); + + // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT + // and .word relocations they put the Thumb bit into the addend if possible. + // Other relocation types don't want this bit though (branches couldn't encode + // it if it *was* present, and no other relocations exist) and it can + // interfere with checking valid expressions. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) && + (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 || + Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 || + Kind == ARM::fixup_t2_movt_hi16)) + Value |= 1; + } + switch (Kind) { default: Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type"); @@ -505,6 +521,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_thumb_bl: { + // FIXME: We get both thumb1 and thumb2 in here, so we can only check for + // the less strict thumb2 value. + if (!isInt<26>(Value - 4)) { + Ctx.reportError(Fixup.getLoc(), "Relocation out of range"); + return 0; + } + // The value doesn't encode the low bit (always zero) and is offset by // four. The 32-bit immediate value is encoded as // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) @@ -716,29 +739,11 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { + const MCValue &Target, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; const unsigned FixupKind = Fixup.getKind() ; - // MachO (the only user of "Value") tries to make .o files that look vaguely - // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit - // into the addend if possible. Other relocation types don't want this bit - // though (branches couldn't encode it if it *was* present, and no other - // relocations exist) and it can interfere with checking valid expressions. - if (FixupKind == FK_Data_4 || - FixupKind == ARM::fixup_arm_movw_lo16 || - FixupKind == ARM::fixup_arm_movt_hi16 || - FixupKind == ARM::fixup_t2_movw_lo16 || - FixupKind == ARM::fixup_t2_movt_hi16) { - if (Sym) { - if (Asm.isThumbFunc(Sym)) - Value |= 1; - } - } if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { assert(Sym && "How did we resolve this?"); @@ -747,7 +752,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // If the symbol is out of range, produce a relocation and hope the // linker can handle it. GNU AS produces an error in this case. - if (Sym->isExternal() || Value >= 0x400004) + if (Sym->isExternal()) IsResolved = false; } // Create relocations for unconditional branches to function symbols with @@ -759,6 +764,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, IsResolved = false; if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_condbranch || FixupKind == ARM::fixup_t2_uncondbranch)) IsResolved = false; } @@ -875,22 +881,25 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { } } -void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, bool IsPCRel, - MCContext &Ctx) const { +void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true); + MCContext &Ctx = Asm.getContext(); + Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx, + IsLittleEndian, true); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FullSizeBytes; if (!IsLittleEndian) { FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind()); - assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!"); + assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!"); assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 2ddedb5d61059..6a0ba2ed41c1a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -40,17 +40,17 @@ public: /// processFixupValue - Target hook to process the literal value of a fixup /// if necessary. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; + void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, bool &IsResolved) override; - unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel, + unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, bool IsPCRel, MCContext &Ctx, bool IsLittleEndian, bool IsResolved) const; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 00505a103e00f..f74fb2e20b5a3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -33,8 +33,8 @@ public: ~ARMWinCOFFObjectWriter() override = default; - unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection, + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; bool recordRelocation(const MCFixup &) const override; @@ -42,7 +42,8 @@ public: } // end anonymous namespace -unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, +unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const { diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 7d5fb6ca17b98..c6ddd6bdad5e6 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -214,7 +214,12 @@ void BPFDAGToDAGISel::PreprocessISelDAG() { if (Opcode != ISD::LOAD) continue; - unsigned char new_val[8]; // hold up the constant values replacing loads. + union { + uint8_t c[8]; + uint16_t s; + uint32_t i; + uint64_t d; + } new_val; // hold up the constant values replacing loads. bool to_replace = false; SDLoc DL(Node); const LoadSDNode *LD = cast<LoadSDNode>(Node); @@ -242,7 +247,7 @@ void BPFDAGToDAGISel::PreprocessISelDAG() { const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode()); if (GADN && CDN) to_replace = - getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val); + getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c); } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END && LDAddrNode->getNumOperands() > 0) { DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); @@ -250,7 +255,7 @@ void BPFDAGToDAGISel::PreprocessISelDAG() { SDValue OP1 = LDAddrNode->getOperand(0); if (const GlobalAddressSDNode *GADN = dyn_cast<GlobalAddressSDNode>(OP1.getNode())) - to_replace = getConstantFieldValue(GADN, 0, size, new_val); + to_replace = getConstantFieldValue(GADN, 0, size, new_val.c); } if (!to_replace) @@ -259,13 +264,13 @@ void BPFDAGToDAGISel::PreprocessISelDAG() { // replacing the old with a new value uint64_t val; if (size == 1) - val = *(uint8_t *)new_val; + val = new_val.c[0]; else if (size == 2) - val = *(uint16_t *)new_val; + val = new_val.s; else if (size == 4) - val = *(uint32_t *)new_val; + val = new_val.i; else { - val = *(uint64_t *)new_val; + val = new_val.d; } DEBUG(dbgs() << "Replacing load of size " << size << " with constant " @@ -318,14 +323,17 @@ bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node, } // test whether host endianness matches target - uint8_t test_buf[2]; + union { + uint8_t c[2]; + uint16_t s; + } test_buf; uint16_t test_val = 0x2345; if (DL.isLittleEndian()) - support::endian::write16le(test_buf, test_val); + support::endian::write16le(test_buf.c, test_val); else - support::endian::write16be(test_buf, test_val); + support::endian::write16be(test_buf.c, test_val); - bool endian_match = *(uint16_t *)test_buf == test_val; + bool endian_match = test_buf.s == test_val; for (uint64_t i = Offset, j = 0; i < Offset + Size; i++, j++) ByteSeq[j] = endian_match ? TmpVal[i] : TmpVal[Offset + Size - 1 - j]; diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 80357a63a4e12..15e89fb2a2611 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -27,8 +27,9 @@ public: : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {} ~BPFAsmBackend() override = default; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -61,9 +62,10 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, bool IsPCRel, - MCContext &Ctx) const { +void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { assert(Value == 0); } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) { diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 9f8c9ded8127b..734f3c6658d92 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -567,8 +567,19 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, } llvm_unreachable("Invalid register operand"); } - if (SO.isImm() || SO.isFPImm()) - return IfTrue ? C2_cmoveit : C2_cmoveif; + switch (SO.getType()) { + case MachineOperand::MO_Immediate: + case MachineOperand::MO_FPImmediate: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + return IfTrue ? C2_cmoveit : C2_cmoveif; + default: + break; + } llvm_unreachable("Unexpected source operand"); } diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 18e49c69b8e36..2b0ceaa66258e 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1051,10 +1051,26 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, bool HasExtraAlign = HRI.needsStackRealignment(MF); bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; + unsigned FrameSize = MFI.getStackSize(); unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); unsigned AP = HMFI.getStackAlignBasePhysReg(); - unsigned FrameSize = MFI.getStackSize(); + // It may happen that AP will be absent even HasAlloca && HasExtraAlign + // is true. HasExtraAlign may be set because of vector spills, without + // aligned locals or aligned outgoing function arguments. Since vector + // spills will ultimately be "unaligned", it is safe to use FP as the + // base register. + // In fact, in such a scenario the stack is actually not required to be + // aligned, although it may end up being aligned anyway, since this + // particular case is not easily detectable. The alignment will be + // unnecessary, but not incorrect. + // Unfortunately there is no quick way to verify that the above is + // indeed the case (and that it's not a result of an error), so just + // assume that missing AP will be replaced by FP. + // (A better fix would be to rematerialize AP from FP and always align + // vector spills.) + if (AP == 0) + AP = FP; bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). // Use FP at -O0, except when there are objects with extra alignment. @@ -2454,9 +2470,44 @@ bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const { unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF); auto &HST = MF.getSubtarget<HexagonSubtarget>(); // A fairly simplistic guess as to whether a potential load/store to a - // stack location could require an extra register. It does not account - // for store-immediate instructions. - if (HST.useHVXOps()) - return StackSize > 256; + // stack location could require an extra register. + if (HST.useHVXOps() && StackSize > 256) + return true; + + // Check if the function has store-immediate instructions that access + // the stack. Since the offset field is not extendable, if the stack + // size exceeds the offset limit (6 bits, shifted), the stores will + // require a new base register. + bool HasImmStack = false; + unsigned MinLS = ~0u; // Log_2 of the memory access size. + + for (const MachineBasicBlock &B : MF) { + for (const MachineInstr &MI : B) { + unsigned LS = 0; + switch (MI.getOpcode()) { + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + case Hexagon::S4_storeiri_io: + ++LS; + LLVM_FALLTHROUGH; + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + case Hexagon::S4_storeirh_io: + ++LS; + LLVM_FALLTHROUGH; + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + case Hexagon::S4_storeirb_io: + if (MI.getOperand(0).isFI()) + HasImmStack = true; + MinLS = std::min(MinLS, LS); + break; + } + } + } + + if (HasImmStack) + return !isUInt<6>(StackSize >> MinLS); + return false; } diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index 11ac5454f6043..5abbcbba72ddd 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -295,15 +296,12 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; bool Failure = false, CanUp = true, CanDown = true; - bool Used1 = false, Used2 = false; for (unsigned X = MinX+1; X < MaxX; X++) { const DefUseInfo &DU = DUM.lookup(X); if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) { Failure = true; break; } - Used1 |= DU.Uses[SR1]; - Used2 |= DU.Uses[SR2]; if (CanDown && DU.Defs[SR1]) CanDown = false; if (CanUp && DU.Defs[SR2]) @@ -317,64 +315,52 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { // Prefer "down", since this will move the MUX farther away from the // predicate definition. MachineBasicBlock::iterator At = CanDown ? Def2 : Def1; - if (CanDown) { - // If the MUX is placed "down", we need to make sure that there aren't - // any kills of the source registers between the two defs. - if (Used1 || Used2) { - auto ResetKill = [this] (unsigned Reg, MachineInstr &MI) -> bool { - if (MachineOperand *Op = MI.findRegisterUseOperand(Reg, true, HRI)) { - Op->setIsKill(false); - return true; - } - return false; - }; - bool KilledSR1 = false, KilledSR2 = false; - for (MachineInstr &MJ : make_range(std::next(It1), It2)) { - if (SR1) - KilledSR1 |= ResetKill(SR1, MJ); - if (SR2) - KilledSR2 |= ResetKill(SR1, MJ); - } - // If any of the source registers were killed in this range, transfer - // the kills to the source operands: they will me "moved" to the - // resulting MUX and their parent instructions will be deleted. - if (KilledSR1) { - assert(Src1->isReg()); - Src1->setIsKill(true); - } - if (KilledSR2) { - assert(Src2->isReg()); - Src2->setIsKill(true); - } - } - } else { - // If the MUX is placed "up", it shouldn't kill any source registers - // that are still used afterwards. We can reset the kill flags directly - // on the operands, because the source instructions will be erased. - if (Used1 && Src1->isReg()) - Src1->setIsKill(false); - if (Used2 && Src2->isReg()) - Src2->setIsKill(false); - } ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); } - for (unsigned I = 0, N = ML.size(); I < N; ++I) { - MuxInfo &MX = ML[I]; - MachineBasicBlock &B = *MX.At->getParent(); - DebugLoc DL = MX.At->getDebugLoc(); + for (MuxInfo &MX : ML) { unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF); if (!MxOpc) continue; - BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) - .addReg(MX.PredR) - .add(*MX.SrcT) - .add(*MX.SrcF); + MachineBasicBlock &B = *MX.At->getParent(); + const DebugLoc &DL = B.findDebugLoc(MX.At); + auto NewMux = BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) + .addReg(MX.PredR) + .add(*MX.SrcT) + .add(*MX.SrcF); + NewMux->clearKillInfo(); B.erase(MX.Def1); B.erase(MX.Def2); Changed = true; } + // Fix up kill flags. + + LivePhysRegs LPR(*HRI); + LPR.addLiveOuts(B); + auto IsLive = [&LPR,this] (unsigned Reg) -> bool { + for (MCSubRegIterator S(Reg, HRI, true); S.isValid(); ++S) + if (LPR.contains(*S)) + return true; + return false; + }; + for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) { + if (I->isDebugValue()) + continue; + // This isn't 100% accurate, but it's safe. + // It won't detect (as a kill) a case like this + // r0 = add r0, 1 <-- r0 should be "killed" + // ... = r0 + for (MachineOperand &Op : I->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + assert(Op.getSubReg() == 0 && "Should have physical registers only"); + bool Live = IsLive(Op.getReg()); + Op.setIsKill(!Live); + } + LPR.stepBackward(*I); + } + return Changed; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index f43101fa456d5..fec2dc5ce3066 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -94,10 +94,6 @@ static cl::opt<bool> UseDFAHazardRec("dfa-hazard-rec", /// /// Constants for Hexagon instructions. /// -const int Hexagon_MEMV_OFFSET_MAX_128B = 896; // #s4: -8*128...7*128 -const int Hexagon_MEMV_OFFSET_MIN_128B = -1024; // #s4 -const int Hexagon_MEMV_OFFSET_MAX = 448; // #s4: -8*64...7*64 -const int Hexagon_MEMV_OFFSET_MIN = -512; // #s4 const int Hexagon_MEMW_OFFSET_MAX = 4095; const int Hexagon_MEMW_OFFSET_MIN = -4096; const int Hexagon_MEMD_OFFSET_MAX = 8191; @@ -2443,8 +2439,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::V6_vS32b_ai: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vS32Ub_ai: - return (Offset >= Hexagon_MEMV_OFFSET_MIN) && - (Offset <= Hexagon_MEMV_OFFSET_MAX); + return isShiftedInt<4,6>(Offset); case Hexagon::PS_vstorerq_ai_128B: case Hexagon::PS_vstorerw_ai_128B: @@ -2454,8 +2449,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::V6_vS32b_ai_128B: case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::V6_vS32Ub_ai_128B: - return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) && - (Offset <= Hexagon_MEMV_OFFSET_MAX_128B); + return isShiftedInt<4,7>(Offset); case Hexagon::J2_loop0i: case Hexagon::J2_loop1i: diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index d73fc7c73185d..de6b203015d8e 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -629,7 +629,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, - end = jmpPos; localII != end; localII++) { + end = cmpInstr->getIterator(); localII != end; localII++) { MachineInstr &localMI = *localII; for (unsigned j = 0; j < localMI.getNumOperands(); j++) { MachineOperand &localMO = localMI.getOperand(j); diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index ee3209354688d..7d961a238ae28 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -100,9 +100,6 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } - - private: - void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src); }; } @@ -132,7 +129,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { PeepholeDoubleRegsMap.clear(); // Traverse the basic block. - for (MachineInstr &MI : *MBB) { + for (auto I = MBB->begin(), E = MBB->end(), NextI = I; I != E; I = NextI) { + NextI = std::next(I); + MachineInstr &MI = *I; // Look for sign extends: // %vreg170<def> = SXTW %vreg166 if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) { @@ -280,14 +279,13 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (NewOp) { unsigned PSrc = MI.getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { - MI.getOperand(PR).setReg(POrig); + BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(), + QII->get(NewOp), MI.getOperand(0).getReg()) + .addReg(POrig) + .add(MI.getOperand(S2)) + .add(MI.getOperand(S1)); MRI->clearKillFlags(POrig); - MI.setDesc(QII->get(NewOp)); - // Swap operands S1 and S2. - MachineOperand Op1 = MI.getOperand(S1); - MachineOperand Op2 = MI.getOperand(S2); - ChangeOpInto(MI.getOperand(S1), Op2); - ChangeOpInto(MI.getOperand(S2), Op1); + MI.eraseFromParent(); } } // if (NewOp) } // if (!Done) @@ -299,40 +297,6 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { return true; } -void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { - assert (&Dst != &Src && "Cannot duplicate into itself"); - switch (Dst.getType()) { - case MachineOperand::MO_Register: - if (Src.isReg()) { - Dst.setReg(Src.getReg()); - Dst.setSubReg(Src.getSubReg()); - MRI->clearKillFlags(Src.getReg()); - } else if (Src.isImm()) { - Dst.ChangeToImmediate(Src.getImm()); - } else { - llvm_unreachable("Unexpected src operand type"); - } - break; - - case MachineOperand::MO_Immediate: - if (Src.isImm()) { - Dst.setImm(Src.getImm()); - } else if (Src.isReg()) { - Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(), - false, Src.isDead(), Src.isUndef(), - Src.isDebug()); - Dst.setSubReg(Src.getSubReg()); - } else { - llvm_unreachable("Unexpected src operand type"); - } - break; - - default: - llvm_unreachable("Unexpected dst operand type"); - break; - } -} - FunctionPass *llvm::createHexagonPeephole() { return new HexagonPeephole(); } diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 8851a23ae8ace..0aada8a53c979 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -1,4 +1,4 @@ -//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===// +//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===// // // The LLVM Compiler Infrastructure // @@ -11,13 +11,23 @@ // //===----------------------------------------------------------------------===// -#include "HexagonSubtarget.h" #include "Hexagon.h" +#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> #include <map> using namespace llvm; @@ -119,9 +129,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering() { - + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { initializeEnvironment(); // Initialize scheduling itinerary for the specified CPU. @@ -196,7 +204,6 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, updateLatency(*SrcInst, *DstInst, Dep); } - void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { for (auto &SU : DAG->SUnits) { if (!SU.isInstr()) @@ -240,18 +247,18 @@ void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { } } - void HexagonSubtarget::getPostRAMutations( - std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { - Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>()); + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + Mutations.push_back( + llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>()); } void HexagonSubtarget::getSMSMutations( - std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { - Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>()); + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + Mutations.push_back( + llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>()); } - // Pin the vtable to this file. void HexagonSubtarget::anchor() {} @@ -447,4 +454,3 @@ unsigned HexagonSubtarget::getL1PrefetchDistance() const { bool HexagonSubtarget::enableSubRegLiveness() const { return EnableSubregLiveness; } - diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 4379efa79c9cd..753dca0000652 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -1,4 +1,4 @@ -//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===// +//===- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,12 +15,17 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H #include "HexagonFrameLowering.h" -#include "HexagonISelLowering.h" #include "HexagonInstrInfo.h" +#include "HexagonISelLowering.h" #include "HexagonSelectionDAGInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <memory> #include <string> +#include <vector> #define GET_SUBTARGETINFO_HEADER #include "HexagonGenSubtargetInfo.inc" @@ -30,6 +35,12 @@ namespace llvm { +class MachineInstr; +class SDep; +class SUnit; +class TargetMachine; +class Triple; + class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); @@ -57,6 +68,7 @@ private: HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; InstrItineraryData InstrItins; + void initializeEnvironment(); public: @@ -108,6 +120,7 @@ public: bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; + // Always use the TargetLowering default scheduler. // FIXME: This will use the vliw scheduler which is probably just hurting // compiler time and will be removed eventually anyway. @@ -124,6 +137,7 @@ public: unsigned getSmallDataThreshold() const { return Hexagon_SMALL_DATA_THRESHOLD; } + const HexagonArchEnum &getHexagonArchVersion() const { return HexagonArchVersion; } @@ -155,4 +169,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index e507a797871fc..031a1bdefafbf 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -291,7 +291,6 @@ bool HexagonPassConfig::addInstSelector() { if (EnableBitSimplify) addPass(createHexagonBitSimplify()); addPass(createHexagonPeephole()); - printAndVerify("After hexagon peephole pass"); // Constant propagation. if (!DisableHCP) { addPass(createHexagonConstPropagationPass()); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 545c8b6b2acde..093ce80bc2e3f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -202,10 +202,8 @@ public: /// processFixupValue - Target hook to adjust the literal value of a fixup /// if necessary. IsResolved signals whether the caller believes a relocation /// is needed; the target can modify the value. The default does nothing. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override { + void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, bool &IsResolved) override { MCFixupKind Kind = Fixup.getKind(); switch((unsigned)Kind) { @@ -415,9 +413,9 @@ public: /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t FixupValue, bool IsPCRel, - MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t FixupValue, bool IsPCRel) const override { // When FixupValue is 0 the relocation is external and there // is nothing for us to do. @@ -432,8 +430,8 @@ public: // to a real offset before we can use it. uint32_t Offset = Fixup.getOffset(); unsigned NumBytes = getFixupKindNumBytes(Kind); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - char *InstAddr = Data + Offset; + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + char *InstAddr = Data.data() + Offset; Value = adjustFixupValue(Kind, FixupValue); if(!Value) @@ -517,7 +515,7 @@ public: dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << ": AValue=0x"; dbgs().write_hex(FixupValue) << ": Offset=" << Offset << - ": Size=" << DataSize << + ": Size=" << Data.size() << ": OInst=0x"; dbgs().write_hex(OldData) << ": Reloc=0x"; dbgs().write_hex(Reloc);); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index e8f154a1fa533..c7114c7f18a0a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -701,33 +701,32 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { break; case Hexagon::A2_addi: Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); - assert(Absolute);(void)Absolute; - if (Value == 1) { - Result.setOpcode(Hexagon::SA1_inc); - addOps(Result, Inst, 0); - addOps(Result, Inst, 1); - break; - } // 1,2 SUBInst $Rd = add($Rs, #1) - else if (Value == -1) { - Result.setOpcode(Hexagon::SA1_dec); - addOps(Result, Inst, 0); - addOps(Result, Inst, 1); - addOps(Result, Inst, 2); - break; - } // 1,2 SUBInst $Rd = add($Rs,#-1) - else if (Inst.getOperand(1).getReg() == Hexagon::R29) { - Result.setOpcode(Hexagon::SA1_addsp); - addOps(Result, Inst, 0); - addOps(Result, Inst, 2); - break; - } // 1,3 SUBInst $Rd = add(r29, #$u6_2) - else { - Result.setOpcode(Hexagon::SA1_addi); - addOps(Result, Inst, 0); - addOps(Result, Inst, 1); - addOps(Result, Inst, 2); - break; - } // 1,2,3 SUBInst $Rx = add($Rx, #$s7) + if (Absolute) { + if (Value == 1) { + Result.setOpcode(Hexagon::SA1_inc); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs, #1) + if (Value == -1) { + Result.setOpcode(Hexagon::SA1_dec); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; + } // 1,2 SUBInst $Rd = add($Rs,#-1) + if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::SA1_addsp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; + } // 1,3 SUBInst $Rd = add(r29, #$u6_2) + } + Result.setOpcode(Hexagon::SA1_addi); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rx = add($Rx, #$s7) case Hexagon::A2_add: Result.setOpcode(Hexagon::SA1_addrx); addOps(Result, Inst, 0); diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index 0ef1401ef531a..c212726113ab7 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -49,8 +49,9 @@ public: LanaiAsmBackend(const Target &T, Triple::OSType OST) : MCAsmBackend(), OSType(OST) {} - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -88,9 +89,10 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void LanaiAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned /*DataSize*/, uint64_t Value, - bool /*IsPCRel*/, MCContext & /*Ctx*/) const { +void LanaiAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool /*IsPCRel*/) const { MCFixupKind Kind = Fixup.getKind(); Value = adjustFixupValue(static_cast<unsigned>(Kind), Value); diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index d8fdc8ba674e6..982c6fea62d44 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -32,16 +32,20 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { return *RM; } +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16"; +} + MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, "e-m:e-p:16:16-i32:16:32-a:16-n8:16", TT, CPU, FS, + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), - // FIXME: Check DataLayout string. Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 694c201cbe8dc..9d5c179a0fd90 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -322,6 +322,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseDirectiveSet(); bool parseDirectiveOption(); bool parseInsnDirective(); + bool parseRSectionDirective(StringRef Section); bool parseSSectionDirective(StringRef Section, unsigned Type); bool parseSetAtDirective(); @@ -5106,7 +5107,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { CC = StringSwitch<unsigned>(Name) .Case("zero", 0) - .Case("at", 1) + .Cases("at", "AT", 1) .Case("a0", 4) .Case("a1", 5) .Case("a2", 6) @@ -6952,6 +6953,23 @@ bool MipsAsmParser::parseInsnDirective() { return false; } +/// parseRSectionDirective +/// ::= .rdata +bool MipsAsmParser::parseRSectionDirective(StringRef Section) { + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + MCSection *ELFSection = getContext().getELFSection( + Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + getParser().getStreamer().SwitchSection(ELFSection); + + getParser().Lex(); // Eat EndOfStatement token. + return false; +} + /// parseSSectionDirective /// ::= .sbss /// ::= .sdata @@ -7499,6 +7517,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { parseInsnDirective(); return false; } + if (IDVal == ".rdata") { + parseRSectionDirective(".rodata"); + return false; + } if (IDVal == ".sbss") { parseSSectionDirective(IDVal, ELF::SHT_NOBITS); return false; diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 6d3d4db036032..ae48d6e38fa0f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -235,10 +235,12 @@ static unsigned calculateMMLEIndex(unsigned i) { /// ApplyFixup - Apply the \p Value for given \p Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. -void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, bool IsPCRel, - MCContext &Ctx) const { +void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { MCFixupKind Kind = Fixup.getKind(); + MCContext &Ctx = Asm.getContext(); Value = adjustFixupValue(Fixup, Value, Ctx); if (!Value) diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 4b3cc6e21f4cd..bf3b290b7ed53 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -38,8 +38,9 @@ public: MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; Optional<MCFixupKind> getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 3272319ad50f4..7daea163b8a64 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -326,9 +326,9 @@ class AUIPC_DESC : ALUIPC_DESC_BASE<"auipc", GPR32Opnd, II_AUIPC>; class AUI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, InstrItinClass itin = NoItinerary> : MipsR6Arch<instr_asm> { - dag OutOperandList = (outs GPROpnd:$rs); - dag InOperandList = (ins GPROpnd:$rt, uimm16:$imm); - string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm"); + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm"); list<dag> Pattern = []; InstrItinClass Itinerary = itin; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 68708dc4f50fe..02102d6b22f4e 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -907,6 +907,11 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, if (!(CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1)))) return SDValue(); } + // Don't generate INS if constant OR operand doesn't fit into bits + // cleared by constant AND operand. + if (CN->getSExtValue() & CN1->getSExtValue()) + return SDValue(); + SDLoc DL(N); EVT ValTy = N->getOperand(0)->getValueType(0); SDValue Const1; diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 272595af5f6f1..b95f1158fa562 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -274,8 +274,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { if (IsPIC) { MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); MF->insert(FallThroughMBB, BalTgtMBB); - LongBrMBB->addSuccessor(BalTgtMBB, BranchProbability::getOne()); - BalTgtMBB->addSuccessor(&*FallThroughMBB, BranchProbability::getOne()); + LongBrMBB->addSuccessor(BalTgtMBB); + BalTgtMBB->addSuccessor(TgtMBB); // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an @@ -342,8 +342,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP).addImm(8); if (Subtarget.hasMips32r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR), Mips::ZERO) - .addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR)) + .addReg(Mips::ZERO).addReg(Mips::AT); else BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); @@ -415,8 +415,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP_64).addImm(0); if (Subtarget.hasMips64r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64), Mips::ZERO_64) - .addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64)) + .addReg(Mips::ZERO_64).addReg(Mips::AT_64); else BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 2382ea2716612..b57bceb3c8371 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1257,19 +1257,22 @@ static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { EVT ResVecTy = Op->getValueType(0); EVT ViaVecTy = ResVecTy; + bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); SDLoc DL(Op); // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating // lanes. - SDValue LaneA; - SDValue LaneB = Op->getOperand(2); + SDValue LaneA = Op->getOperand(OpNr); + SDValue LaneB; if (ResVecTy == MVT::v2i64) { - LaneA = DAG.getConstant(0, DL, MVT::i32); + LaneB = DAG.getConstant(0, DL, MVT::i32); ViaVecTy = MVT::v4i32; + if(BigEndian) + std::swap(LaneA, LaneB); } else - LaneA = LaneB; + LaneB = LaneA; SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; @@ -1277,8 +1280,11 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { SDValue Result = DAG.getBuildVector( ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); - if (ViaVecTy != ResVecTy) - Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); + if (ViaVecTy != ResVecTy) { + SDValue One = DAG.getConstant(1, DL, ViaVecTy); + Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, + DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); + } return Result; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 028c2cb562f8e..6d7eb786a6835 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -113,8 +113,9 @@ public: return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -130,10 +131,8 @@ public: } } - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override { + void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, bool &IsResolved) override { switch ((PPC::Fixups)Fixup.getKind()) { default: break; case PPC::fixup_ppc_br24: diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 38ae62b26757a..07c9c1f9f84c0 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -24,7 +24,6 @@ namespace llvm { class PPCTargetMachine; class PassRegistry; class FunctionPass; - class ImmutablePass; class MachineInstr; class AsmPrinter; class MCInst; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 57a1d373c88cf..c2c115cb6dafa 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -521,7 +521,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool HasBP = RegInfo->hasBasePointer(MF); unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; - unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; + unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 662550f7a396a..72f14e9691382 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2560,8 +2560,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_LO); - SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); + SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) + : DAG.getRegister(PPC::R2, MVT::i32); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } @@ -8377,9 +8378,9 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (IntrinsicID == Intrinsic::thread_pointer) { // Reads the thread pointer register, used for __builtin_thread_pointer. - bool is64bit = Subtarget.isPPC64(); - return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); + if (Subtarget.isPPC64()) + return DAG.getRegister(PPC::X13, MVT::i64); + return DAG.getRegister(PPC::R2, MVT::i32); } // If this is a lowered altivec predicate compare, CompareOpc is set to the diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 70536a6039b82..e2af5e5295445 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -972,13 +972,15 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), // Support for medium and large code model. let hasSideEffects = 0 in { +let isReMaterializable = 1 in { def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDIStocHA", []>, isPPC64; +def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDItocL", []>, isPPC64; +} let mayLoad = 1 in def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), "#LDtocL", []>, isPPC64; -def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), - "#ADDItocL", []>, isPPC64; } // Support for thread-local storage. @@ -994,7 +996,7 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg), (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; -let isBarrier = 1, isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in +let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>; def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 236e513bec231..13b4f9ab962da 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -292,6 +292,29 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return 0; } +// For opcodes with the ReMaterializable flag set, this function is called to +// verify the instruction is really rematable. +bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA) const { + switch (MI.getOpcode()) { + default: + // This function should only be called for opcodes with the ReMaterializable + // flag set. + llvm_unreachable("Unknown rematerializable operation!"); + break; + case PPC::LI: + case PPC::LI8: + case PPC::LIS: + case PPC::LIS8: + case PPC::QVGPCI: + case PPC::ADDIStocHA: + case PPC::ADDItocL: + case PPC::LOAD_STACK_GUARD: + return true; + } + return false; +} + unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { // Note: This list must be kept consistent with StoreRegToStackSlot. diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 8dd4dbb608794..b0629c88cf57b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -162,6 +162,8 @@ public: unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 637e52bbdbeec..8af7f7e981171 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -389,9 +389,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { - BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) - .addReg(PPC::R31) - .addImm(FrameSize); + if (LP64) + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), Reg) + .addReg(PPC::X31) + .addImm(FrameSize); + else + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) + .addReg(PPC::R31) + .addImm(FrameSize); } else if (LP64) { BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) .addImm(0) @@ -478,8 +483,10 @@ void PPCRegisterInfo::lowerDynamicAreaOffset( const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); + bool is64Bit = TM.isPPC64(); DebugLoc dl = MI.getDebugLoc(); - BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg()) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), + MI.getOperand(0).getReg()) .addImm(maxCallFrameSize); MBB.erase(II); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 5a226b23ff96f..a88a6541e8d00 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -86,9 +86,9 @@ EnableMachineCombinerPass("ppc-machine-combiner", extern "C" void LLVMInitializePowerPCTarget() { // Register the targets - RegisterTargetMachine<PPC32TargetMachine> A(getThePPC32Target()); - RegisterTargetMachine<PPC64TargetMachine> B(getThePPC64Target()); - RegisterTargetMachine<PPC64TargetMachine> C(getThePPC64LETarget()); + RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); + RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target()); + RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); @@ -177,32 +177,34 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, assert(Options.MCOptions.getABIName().empty() && "Unknown target-abi option!"); - if (!TT.isMacOSX()) { - switch (TT.getArch()) { - case Triple::ppc64le: - return PPCTargetMachine::PPC_ABI_ELFv2; - case Triple::ppc64: - return PPCTargetMachine::PPC_ABI_ELFv1; - default: - // Fallthrough. - ; - } + if (TT.isMacOSX()) + return PPCTargetMachine::PPC_ABI_UNKNOWN; + + switch (TT.getArch()) { + case Triple::ppc64le: + return PPCTargetMachine::PPC_ABI_ELFv2; + case Triple::ppc64: + return PPCTargetMachine::PPC_ABI_ELFv1; + default: + return PPCTargetMachine::PPC_ABI_UNKNOWN; } - return PPCTargetMachine::PPC_ABI_UNKNOWN; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional<Reloc::Model> RM) { - if (!RM.hasValue()) { - if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { - if (!TT.isOSBinFormatMachO() && !TT.isMacOSX()) - return Reloc::PIC_; - } - if (TT.isOSDarwin()) - return Reloc::DynamicNoPIC; - return Reloc::Static; - } - return *RM; + if (RM.hasValue()) + return *RM; + + // Darwin defaults to dynamic-no-pic. + if (TT.isOSDarwin()) + return Reloc::DynamicNoPIC; + + // Non-darwin 64-bit platforms are PIC by default. + if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) + return Reloc::PIC_; + + // 32-bit is static by default. + return Reloc::Static; } // The FeatureString here is a little subtle. We are modifying the feature @@ -224,26 +226,6 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, PPCTargetMachine::~PPCTargetMachine() = default; -void PPC32TargetMachine::anchor() {} - -PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} - -void PPC64TargetMachine::anchor() {} - -PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} - const PPCSubtarget * PPCTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -406,7 +388,7 @@ void PPCPassConfig::addPreRegAlloc() { // FIXME: We probably don't need to run these for -fPIE. if (getPPCTargetMachine().isPositionIndependent()) { // FIXME: LiveVariables should not be necessary here! - // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on + // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on // LiveVariables. This (unnecessary) dependency has been removed now, // however a stage-2 clang build fails without LiveVariables computed here. addPass(&LiveVariablesID, false); diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index b8f5a2083d808..5eb6ba785d1b8 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -23,7 +23,7 @@ namespace llvm { /// Common code between 32-bit and 64-bit PowerPC targets. /// -class PPCTargetMachine : public LLVMTargetMachine { +class PPCTargetMachine final : public LLVMTargetMachine { public: enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 }; private: @@ -60,29 +60,6 @@ public: return false; } }; - -/// PowerPC 32-bit target machine. -/// -class PPC32TargetMachine : public PPCTargetMachine { - virtual void anchor(); -public: - PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// PowerPC 64-bit target machine. -/// -class PPC64TargetMachine : public PPCTargetMachine { - virtual void anchor(); -public: - PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - } // end namespace llvm #endif diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index d9a71893afee7..f85c0cf111c43 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -32,8 +32,9 @@ public: : MCAsmBackend(), OSABI(OSABI), Is64Bit(Is64Bit) {} ~RISCVAsmBackend() override {} - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -69,9 +70,10 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void RISCVAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { return; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index d1d1334163a26..c72b47b090857 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -203,10 +203,8 @@ namespace { return InfosBE[Kind - FirstTargetFixupKind]; } - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override { + void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, bool &IsResolved) override { switch ((Sparc::Fixups)Fixup.getKind()) { default: break; case Sparc::fixup_sparc_wplt30: @@ -273,9 +271,9 @@ namespace { ELFSparcAsmBackend(const Target &T, Triple::OSType OSType) : SparcAsmBackend(T), OSType(OSType) { } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, - MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp index 627e49a95f3cc..2c040dce994b6 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -15,6 +15,12 @@ using namespace llvm; +void SparcELFTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { diff --git a/lib/Target/Sparc/SparcTargetObjectFile.h b/lib/Target/Sparc/SparcTargetObjectFile.h index fe8800625a567..3b1b345c3b193 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.h +++ b/lib/Target/Sparc/SparcTargetObjectFile.h @@ -23,6 +23,8 @@ public: TargetLoweringObjectFileELF() {} + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index fd1fd7bc40dcc..6b32a7926437a 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -50,8 +50,9 @@ public: return SystemZ::NumTargetFixupKinds; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } @@ -89,15 +90,17 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { return Infos[Kind - FirstTargetFixupKind]; } -void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { MCFixupKind Kind = Fixup.getKind(); unsigned Offset = Fixup.getOffset(); unsigned BitSize = getFixupKindInfo(Kind).TargetSize; unsigned Size = (BitSize + 7) / 8; - assert(Offset + Size <= DataSize && "Invalid fixup offset!"); + assert(Offset + Size <= Data.size() && "Invalid fixup offset!"); // Big-endian insertion of Size bytes. Value = extractBitsForFixup(Kind, Value); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index a28a91e834f61..0cb2b5a14ce73 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -277,8 +277,21 @@ void SystemZFrameLowering:: processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFFrame = MF.getFrameInfo(); - uint64_t MaxReach = (MFFrame.estimateStackSize(MF) + - SystemZMC::CallFrameSize * 2); + // Get the size of our stack frame to be allocated ... + uint64_t StackSize = (MFFrame.estimateStackSize(MF) + + SystemZMC::CallFrameSize); + // ... and the maximum offset we may need to reach into the + // caller's frame to access the save area or stack arguments. + int64_t MaxArgOffset = SystemZMC::CallFrameSize; + for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) + if (MFFrame.getObjectOffset(I) >= 0) { + int64_t ArgOffset = SystemZMC::CallFrameSize + + MFFrame.getObjectOffset(I) + + MFFrame.getObjectSize(I); + MaxArgOffset = std::max(MaxArgOffset, ArgOffset); + } + + uint64_t MaxReach = StackSize + MaxArgOffset; if (!isUInt<12>(MaxReach)) { // We may need register scavenging slots if some parts of the frame // are outside the reach of an unsigned 12-bit displacement. diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index ac4c3f6db684d..fef4a8c92a362 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1322,11 +1322,6 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); } -SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad( - SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const { - return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); -} - // Return true if Op is an intrinsic node with chain that returns the CC value // as its only (other) argument. Provide the associated SystemZISD opcode and // the mask of valid CC values if so. @@ -2059,6 +2054,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SHL && isSimpleShift(NewC.Op0, ShiftVal) && + (MaskVal >> ShiftVal != 0) && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal >> ShiftVal, CmpVal >> ShiftVal, @@ -2068,6 +2064,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, } else if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SRL && isSimpleShift(NewC.Op0, ShiftVal) && + (MaskVal << ShiftVal != 0) && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal << ShiftVal, CmpVal << ShiftVal, @@ -3212,12 +3209,15 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } -// Op is an atomic load. Lower it into a normal volatile load. +// Op is an atomic load. Lower it into a serialization followed +// by a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast<AtomicSDNode>(Op.getNode()); + SDValue Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), + MVT::Other, Node->getChain()), 0); return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), - Node->getChain(), Node->getBasePtr(), + Chain, Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); } @@ -4688,7 +4688,6 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(STRCMP); OPCODE(SEARCH_STRING); OPCODE(IPM); - OPCODE(SERIALIZE); OPCODE(MEMBARRIER); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 79c8c4d92669f..5dcb19c0a35db 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -139,9 +139,6 @@ enum NodeType : unsigned { // Store the CC value in bits 29 and 28 of an integer. IPM, - // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) - SERIALIZE, - // Compiler barrier only; generate a no-op. MEMBARRIER, @@ -471,8 +468,6 @@ public: const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; - SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, - SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; ISD::NodeType getExtendForAtomicOps() const override { @@ -522,7 +517,6 @@ private: unsigned Opcode) const; SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index fa5ecdd852433..9f5e6288348e0 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -189,18 +189,15 @@ let isBranch = 1, isTerminator = 1 in { //===----------------------------------------------------------------------===// // Unconditional trap. -// FIXME: This trap instruction should be marked as isTerminator, but there is -// currently a general bug that allows non-terminators to be placed between -// terminators. Temporarily leave this unmarked until the bug is fixed. -let isBarrier = 1, hasCtrlDep = 1 in +let hasCtrlDep = 1 in def Trap : Alias<4, (outs), (ins), [(trap)]>; // Conditional trap. -let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in +let hasCtrlDep = 1, Uses = [CC] in def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; // Fused compare-and-trap instructions. -let isTerminator = 1, hasCtrlDep = 1 in { +let hasCtrlDep = 1 in { // These patterns work the same way as for compare-and-branch. defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>; defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>; @@ -1449,7 +1446,7 @@ let Predicates = [FeatureExecutionHint] in { // A serialization instruction that acts as a barrier for all memory // accesses, which expands to "bcr 14, 0". let hasSideEffects = 1 in -def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; +def Serialize : Alias<2, (outs), (ins), []>; // A pseudo instruction that serves as a compiler barrier. let hasSideEffects = 1, hasNoSchedulingInfo = 1 in diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp index ab6020f3f1896..b6feaa49d8585 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -23,7 +23,7 @@ using namespace llvm; #ifndef NDEBUG // Print the set of SUs void SystemZPostRASchedStrategy::SUSet:: -dump(SystemZHazardRecognizer &HazardRec) { +dump(SystemZHazardRecognizer &HazardRec) const { dbgs() << "{"; for (auto &SU : *this) { HazardRec.dumpSU(SU, dbgs()); diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h index 12357e0348a9e..3dfef388691e7 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -72,7 +72,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { // A set of SUs with a sorter and dump method. struct SUSet : std::set<SUnit*, SUSorter> { #ifndef NDEBUG - void dump(SystemZHazardRecognizer &HazardRec); + void dump(SystemZHazardRecognizer &HazardRec) const; #endif }; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index adfc69c5d4cf4..ab2392809f3be 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -191,8 +191,6 @@ def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; -def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, - [SDNPHasChain, SDNPMayStore]>; def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone, [SDNPHasChain, SDNPSideEffect]>; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index eb2f17a2091c3..a10ca64fa6329 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -51,8 +51,6 @@ public: } bool targetSchedulesPostRAScheduling() const override { return true; }; - - bool isMachineVerifierClean() const override { return false; } }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 4f20096c15830..1357cb5735f8a 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -37,8 +37,9 @@ public: : MCAsmBackend(), Is64Bit(Is64Bit) {} ~WebAssemblyAsmBackendELF() override {} - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -77,8 +78,9 @@ public: const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -105,9 +107,11 @@ bool WebAssemblyAsmBackendELF::writeNopData(uint64_t Count, return true; } -void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void WebAssemblyAsmBackendELF::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const { const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags"); @@ -119,7 +123,7 @@ void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. @@ -163,9 +167,11 @@ bool WebAssemblyAsmBackend::writeNopData(uint64_t Count, return true; } -void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const { const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags"); @@ -177,7 +183,7 @@ void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index c56c591def361..3e3b52fca5691 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -36,7 +36,6 @@ STATISTIC(MCNumFixups, "Number of MC fixups created."); namespace { class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCInstrInfo &MCII; - MCContext &Ctx; // Implementation generated by tablegen. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -48,14 +47,12 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCSubtargetInfo &STI) const override; public: - WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), Ctx(ctx) {} + WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {} }; } // end anonymous namespace -MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx) { - return new WebAssemblyMCCodeEmitter(MCII, Ctx); +MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) { + return new WebAssemblyMCCodeEmitter(MCII); } void WebAssemblyMCCodeEmitter::encodeInstruction( @@ -89,11 +86,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) { encodeSLEB128(int64_t(MO.getImm()), OS); } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) { - Fixups.push_back(MCFixup::create( - OS.tell() - Start, MCConstantExpr::create(MO.getImm(), Ctx), - MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc())); - ++MCNumFixups; - encodeULEB128(uint64_t(MO.getImm()), OS); + llvm_unreachable("wasm globals should only be accessed symbolicly"); } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) { encodeSLEB128(int64_t(MO.getImm()), OS); } else { @@ -135,6 +128,9 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) { FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32); PaddedSize = 5; + } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) { + FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index); + PaddedSize = 5; } else { llvm_unreachable("unexpected symbolic operand kind"); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 9fd3ec81c258f..9580eeaa33d73 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -74,7 +74,7 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/, static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo & /*MRI*/, MCContext &Ctx) { - return createWebAssemblyMCCodeEmitter(MCII, Ctx); + return createWebAssemblyMCCodeEmitter(MCII); } static MCAsmBackend *createAsmBackend(const Target & /*T*/, diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 0ba700a86b744..4d676c32a09c5 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -35,8 +35,7 @@ class raw_pwrite_stream; Target &getTheWebAssemblyTarget32(); Target &getTheWebAssemblyTarget64(); -MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx); +MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 19e14f3261aa7..9cf77829f3bc2 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -68,6 +68,8 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, bool IsFunction = IsFunctionExpr(Fixup.getValue()); switch (unsigned(Fixup.getKind())) { + case WebAssembly::fixup_code_global_index: + return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB; case WebAssembly::fixup_code_sleb128_i32: if (IsFunction) return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB; diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index b999091e2d294..f51585a10ca12 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -96,13 +96,6 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { MCConstantExpr::create(Size, OutContext)); } } - - if (!TM.getTargetTriple().isOSBinFormatELF()) { - MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo<MachineModuleInfoWasm>(); - getTargetStreamer()->emitGlobal(MMIW.getGlobals()); - if (MMIW.hasStackPointerGlobal()) - getTargetStreamer()->emitStackPointer(MMIW.getStackPointerGlobal()); - } } void WebAssemblyAsmPrinter::EmitConstantPool() { diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 09338a4898e03..c980f4b87f916 100644 --- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -63,12 +63,16 @@ class WebAssemblyFastISel final : public FastISel { public: // Innocuous defaults for our address. Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; } - void setKind(BaseKind K) { Kind = K; } + void setKind(BaseKind K) { + assert(!isSet() && "Can't change kind with non-zero base"); + Kind = K; + } BaseKind getKind() const { return Kind; } bool isRegBase() const { return Kind == RegBase; } bool isFIBase() const { return Kind == FrameIndexBase; } void setReg(unsigned Reg) { assert(isRegBase() && "Invalid base register access!"); + assert(Base.Reg == 0 && "Overwriting non-zero register"); Base.Reg = Reg; } unsigned getReg() const { @@ -77,6 +81,7 @@ class WebAssemblyFastISel final : public FastISel { } void setFI(unsigned FI) { assert(isFIBase() && "Invalid base frame index access!"); + assert(Base.FI == 0 && "Overwriting non-zero frame index"); Base.FI = FI; } unsigned getFI() const { @@ -91,6 +96,13 @@ class WebAssemblyFastISel final : public FastISel { int64_t getOffset() const { return Offset; } void setGlobalValue(const GlobalValue *G) { GV = G; } const GlobalValue *getGlobalValue() const { return GV; } + bool isSet() const { + if (isRegBase()) { + return Base.Reg != 0; + } else { + return Base.FI != 0; + } + } }; /// Keep a pointer to the WebAssemblySubtarget around so that we can make the @@ -297,6 +309,9 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { DenseMap<const AllocaInst *, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { + if (Addr.isSet()) { + return false; + } Addr.setKind(Address::FrameIndexBase); Addr.setFI(SI->second); return true; @@ -341,6 +356,9 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { break; } } + if (Addr.isSet()) { + return false; + } Addr.setReg(getRegForValue(Obj)); return Addr.getReg() != 0; } diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 4209bc333f230..a37d6136e44ed 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -104,10 +104,10 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, const DebugLoc &DL) { const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); if (MF.getSubtarget<WebAssemblySubtarget>() .getTargetTriple().isOSBinFormatELF()) { - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); @@ -125,10 +125,8 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, .addReg(SrcReg) .addMemOperand(MMO); } else { - MachineModuleInfoWasm &MMIW = - MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>(); BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32)) - .addImm(MMIW.getStackPointerGlobal()) + .addExternalSymbol(SPSymbol) .addReg(SrcReg); } } @@ -171,10 +169,11 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, unsigned SPReg = WebAssembly::SP32; if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); + + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); if (MF.getSubtarget<WebAssemblySubtarget>() .getTargetTriple().isOSBinFormatELF()) { - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); unsigned Zero = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero) @@ -189,22 +188,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(Zero) // addr .addMemOperand(LoadMMO); } else { - auto &MMIW = MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>(); - if (!MMIW.hasStackPointerGlobal()) { - MMIW.setStackPointerGlobal(MMIW.getGlobals().size()); - - // Create the stack-pointer global. For now, just use the - // Emscripten/Binaryen ABI names. - wasm::Global G; - G.Type = wasm::ValType::I32; - G.Mutable = true; - G.InitialValue = 0; - G.InitialModule = "env"; - G.InitialName = "STACKTOP"; - MMIW.addGlobal(G); - } BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg) - .addImm(MMIW.getStackPointerGlobal()); + .addExternalSymbol(SPSymbol); } bool HasBP = hasBP(MF); diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 6650191807dcb..ea9e3fa862ce2 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -170,28 +170,16 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, if (MI.mayStore()) { Write = true; - const MachineFunction &MF = *MI.getParent()->getParent(); - if (MF.getSubtarget<WebAssemblySubtarget>() - .getTargetTriple().isOSBinFormatELF()) { - // Check for stores to __stack_pointer. - for (auto MMO : MI.memoperands()) { - const MachinePointerInfo &MPI = MMO->getPointerInfo(); - if (MPI.V.is<const PseudoSourceValue *>()) { - auto PSV = MPI.V.get<const PseudoSourceValue *>(); - if (const ExternalSymbolPseudoSourceValue *EPSV = - dyn_cast<ExternalSymbolPseudoSourceValue>(PSV)) - if (StringRef(EPSV->getSymbol()) == "__stack_pointer") - StackPointer = true; - } - } - } else { - // Check for sets of the stack pointer. - const MachineModuleInfoWasm &MMIW = - MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>(); - if ((MI.getOpcode() == WebAssembly::SET_LOCAL_I32 || - MI.getOpcode() == WebAssembly::SET_LOCAL_I64) && - MI.getOperand(0).getImm() == MMIW.getStackPointerGlobal()) { - StackPointer = true; + // Check for stores to __stack_pointer. + for (auto MMO : MI.memoperands()) { + const MachinePointerInfo &MPI = MMO->getPointerInfo(); + if (MPI.V.is<const PseudoSourceValue *>()) { + auto PSV = MPI.V.get<const PseudoSourceValue *>(); + if (const ExternalSymbolPseudoSourceValue *EPSV = + dyn_cast<ExternalSymbolPseudoSourceValue>(PSV)) + if (StringRef(EPSV->getSymbol()) == "__stack_pointer") { + StackPointer = true; + } } } } else if (MI.hasOrderedMemoryRef()) { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index e5d3209ec6a97..d30cc724c203f 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1705,8 +1705,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); unsigned Len = DotDispStr.size(); - unsigned Val = OrigDispVal + DotDispVal; - InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val); + InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, DotDispVal); } NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 7a9e4f4468ec7..914fb36f91a7d 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -108,12 +108,12 @@ public: return Infos[Kind - FirstTargetFixupKind]; } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override { + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.getOffset() + Size <= DataSize && - "Invalid fixup offset!"); + assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); // Check that uppper bits are either all zeros or all ones. // Specifically ignore overflow/underflow as long as the leakage is diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 4097ef224d503..caf98bffb80de 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -153,8 +153,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( const MCSymbol *B_Base = Asm.getAtom(*B); // Neither symbol can be modified. - if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) { + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None) { Asm.getContext().reportError(Fixup.getLoc(), "unsupported relocation of modified symbol"); return; diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 105580c913a16..5892f1de33eec 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" @@ -25,8 +26,8 @@ public: X86WinCOFFObjectWriter(bool Is64Bit); ~X86WinCOFFObjectWriter() override = default; - unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection, + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; }; @@ -36,11 +37,19 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit) : MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64 : COFF::IMAGE_FILE_MACHINE_I386) {} -unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, +unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const { - unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind(); + unsigned FixupKind = Fixup.getKind(); + if (IsCrossSection) { + if (FixupKind != FK_Data_4) { + Ctx.reportError(Fixup.getLoc(), "Cannot represent this expression"); + return COFF::IMAGE_REL_AMD64_ADDR32; + } + FixupKind = FK_PCRel_4; + } MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 2777fa89330f6..e3aa227702bea 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -748,17 +748,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, else CallOp = X86::CALLpcrel32; - const char *Symbol; - if (Is64Bit) { - if (STI.isTargetCygMing()) { - Symbol = "___chkstk_ms"; - } else { - Symbol = "__chkstk"; - } - } else if (STI.isTargetCygMing()) - Symbol = "_alloca"; - else - Symbol = "_chkstk"; + StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); MachineInstrBuilder CI; MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI); @@ -769,10 +759,11 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, // For the large code model, we have to call through a register. Use R11, // as it is scratch in all supported calling conventions. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) - .addExternalSymbol(Symbol); + .addExternalSymbol(MF.createExternalSymbolName(Symbol)); CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); } else { - CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)) + .addExternalSymbol(MF.createExternalSymbolName(Symbol)); } unsigned AX = Is64Bit ? X86::RAX : X86::EAX; @@ -783,13 +774,16 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, .addReg(SP, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - if (Is64Bit) { + if (STI.isTargetWin64() || !STI.isOSWindows()) { + // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp - // themselves. It also does not clobber %rax so we can reuse it when + // themselves. They also does not clobber %rax so we can reuse it when // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) - .addReg(X86::RSP) - .addReg(X86::RAX); + // All other platforms do not specify a particular ABI for the stack probe + // function, so we arbitrarily define it to not adjust %esp/%rsp itself. + BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Is64Bit)), SP) + .addReg(SP) + .addReg(AX); } if (InProlog) { @@ -978,7 +972,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); + bool UseRedZone = false; + bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); // The default stack probe size is 4096 if the function has no stackprobesize // attribute. @@ -1007,6 +1002,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, !TRI->needsStackRealignment(MF) && !MFI.hasVarSizedObjects() && // No dynamic alloca. !MFI.adjustsStack() && // No calls. + !UseStackProbe && // No stack probes. !IsWin64CC && // Win64 has no Red Zone !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. !MF.shouldSplitStack()) { // Regular stack @@ -1015,6 +1011,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI.setStackSize(StackSize); + UseRedZone = true; } // Insert stack pointer adjustment for later moving of return addr. Only @@ -1192,6 +1189,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { + assert(!UseRedZone && "The Red Zone is not accounted for in stack probes"); + // Check whether EAX is livein for this block. bool isEAXAlive = isEAXLiveIn(MBB); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 2a1633de0a239..3c4589ab18f6f 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -204,6 +204,11 @@ namespace { bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + template <class GatherScatterSDNode> + bool selectAddrOfGatherScatterNode(GatherScatterSDNode *Parent, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment); bool selectMOV64Imm32(SDValue N, SDValue &Imm); bool selectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -1415,13 +1420,10 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { return false; } -bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, - SDValue &Disp, SDValue &Segment) { - - MaskedGatherScatterSDNode *Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent); - if (!Mgs) - return false; +template <class GatherScatterSDNode> +bool X86DAGToDAGISel::selectAddrOfGatherScatterNode( + GatherScatterSDNode *Mgs, SDValue N, SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS. @@ -1453,6 +1455,18 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, return true; } +bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + if (auto Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent)) + return selectAddrOfGatherScatterNode<MaskedGatherScatterSDNode>( + Mgs, N, Base, Scale, Index, Disp, Segment); + if (auto X86Gather = dyn_cast<X86MaskedGatherSDNode>(Parent)) + return selectAddrOfGatherScatterNode<X86MaskedGatherSDNode>( + X86Gather, N, Base, Scale, Index, Disp, Segment); + return false; +} + /// Returns true if it is able to pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 172eba0002d4f..f777e56289884 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1662,6 +1662,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 4; + + // TODO: These control memcmp expansion in CGP and are set low to prevent + // altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder. + MaxLoadsPerMemcmp = 1; + MaxLoadsPerMemcmpOptSize = 1; + // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). setPrefLoopAlignment(ExperimentalPrefLoopAlignment); @@ -14272,9 +14278,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // If we are inserting a element, see if we can do this more efficiently with // a blend shuffle with a rematerializable vector than a costly integer // insertion. - // TODO: pre-SSE41 targets will tend to use bit masking - this could still - // be beneficial if we are inserting several zeros and can combine the masks. - if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) { + if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && + 16 <= EltVT.getSizeInBits()) { SmallVector<int, 8> BlendMask; for (unsigned i = 0; i != NumElts; ++i) BlendMask.push_back(i == IdxVal ? i + NumElts : i); @@ -17621,23 +17626,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; - SDValue CmpOp0 = Cmp.getOperand(0); + // Apply further optimizations for special cases // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb if (isNullConstant(Y) && - (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { - SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, - DAG.getConstant(0, DL, - CmpOp0.getValueType()), - CmpOp0); - SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, DL, MVT::i8), - SDValue(Neg.getNode(), 1)); - return Res; - } + (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { + SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); + SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType()); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0); + SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, DL, MVT::i8), + SDValue(Neg.getNode(), 1)); + return Res; + } Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); @@ -18648,8 +18651,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); + bool EmitStackProbe = !getStackProbeSymbolName(MF).empty(); bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || - SplitStack; + SplitStack || EmitStackProbe; SDLoc dl(Op); // Get the inputs. @@ -23705,6 +23709,57 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, SDValue RetOps[] = {Exract, NewGather.getValue(1)}; return DAG.getMergeValues(RetOps, dl); } + if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) { + // There is a special case when the return type is v2i32 is illegal and + // the type legaizer extended it to v2i64. Without this conversion we end up + // with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD. + // In order to avoid this situation, we'll build an X86 specific Gather node + // with index v2i64 and value type v4i32. + assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 && + "Unexpected type in masked gather"); + Src0 = DAG.getVectorShuffle(MVT::v4i32, dl, + DAG.getBitcast(MVT::v4i32, Src0), + DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 }); + // The mask should match the destination type. Extending mask with zeroes + // is not necessary since instruction itself reads only two values from + // memory. + Mask = ExtendToType(Mask, MVT::v4i1, DAG, false); + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>( + DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(), + N->getMemOperand()); + + SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64, + NewGather.getValue(0), DAG); + SDValue RetOps[] = { Sext, NewGather.getValue(1) }; + return DAG.getMergeValues(RetOps, dl); + } + if (N->getMemoryVT() == MVT::v2f32 && Subtarget.hasVLX()) { + // This transformation is for optimization only. + // The type legalizer extended mask and index to 4 elements vector + // in order to match requirements of the common gather node - same + // vector width of index and value. X86 Gather node allows mismatch + // of vector width in order to select more optimal instruction at the + // end. + assert(VT == MVT::v4f32 && Src0.getValueType() == MVT::v4f32 && + "Unexpected type in masked gather"); + if (Mask.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) && + Index.getOpcode() == ISD::CONCAT_VECTORS && + Index.getOperand(1).isUndef()) { + Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false); + Index = Index.getOperand(0); + } else + return Op; + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>( + DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(), + N->getMemOperand()); + + SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) }; + return DAG.getMergeValues(RetOps, dl); + + } return Op; } @@ -24508,6 +24563,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; case X86ISD::LWPINS: return "X86ISD::LWPINS"; + case X86ISD::MGATHER: return "X86ISD::MGATHER"; } return nullptr; } @@ -29868,7 +29924,7 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; - bool isFastMultiplier = false; + bool IsFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { default: @@ -29880,12 +29936,12 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { case 5: // result = lea base(cond, cond*4) case 8: // result = lea base( , cond*8) case 9: // result = lea base(cond, cond*8) - isFastMultiplier = true; + IsFastMultiplier = true; break; } } - if (isFastMultiplier) { + if (IsFastMultiplier) { APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, @@ -34841,23 +34897,56 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { !Cmp.getOperand(0).getValueType().isInteger()) return SDValue(); - // (cmp Z, 1) sets the carry flag if Z is 0. SDValue Z = Cmp.getOperand(0); - SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, - DAG.getConstant(1, DL, Z.getValueType())); + EVT ZVT = Z.getValueType(); + + // If X is -1 or 0, then we have an opportunity to avoid constants required in + // the general case below. + if (auto *ConstantX = dyn_cast<ConstantSDNode>(X)) { + // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with + // fake operands: + // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) + // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z) + if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) || + (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) { + SDValue Zero = DAG.getConstant(0, DL, ZVT); + SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + SDValue(Neg.getNode(), 1)); + } + + // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb' + // with fake operands: + // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1) + // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1) + if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) || + (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) { + SDValue One = DAG.getConstant(1, DL, ZVT); + SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1); + } + } - SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); + // (cmp Z, 1) sets the carry flag if Z is 0. + SDValue One = DAG.getConstant(1, DL, ZVT); + SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); + + // Add the flags type for ADC/SBB nodes. + SDVTList VTs = DAG.getVTList(VT, MVT::i32); // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) if (CC == X86::COND_NE) return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, - DAG.getConstant(-1ULL, DL, VT), NewCmp); + DAG.getConstant(-1ULL, DL, VT), Cmp1); // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1) // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1) return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, - DAG.getConstant(0, DL, VT), NewCmp); + DAG.getConstant(0, DL, VT), Cmp1); } static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, @@ -34976,6 +35065,32 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi); } +/// Convert vector increment or decrement to sub/add with an all-ones constant: +/// add X, <1, 1...> --> sub X, <-1, -1...> +/// sub X, <1, 1...> --> add X, <-1, -1...> +/// The all-ones vector constant can be materialized using a pcmpeq instruction +/// that is commonly recognized as an idiom (has no register dependency), so +/// that's better/smaller than loading a splat 1 constant. +static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB && + "Unexpected opcode for increment/decrement transform"); + + // Pseudo-legality check: getOnesVector() expects one of these types, so bail + // out and wait for legalization if we have an unsupported vector length. + EVT VT = N->getValueType(0); + if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) + return SDValue(); + + SDNode *N1 = N->getOperand(1).getNode(); + APInt SplatVal; + if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue()) + return SDValue(); + + SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); + unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; + return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); +} + static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { const SDNodeFlags Flags = N->getFlags(); @@ -34995,6 +35110,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1); + if (SDValue V = combineIncDecVector(N, DAG)) + return V; + return combineAddOrSubToADCOrSBB(N, DAG); } @@ -35028,6 +35146,9 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, isHorizontalBinOp(Op0, Op1, false)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); + if (SDValue V = combineIncDecVector(N, DAG)) + return V; + return combineAddOrSubToADCOrSBB(N, DAG); } @@ -36335,3 +36456,22 @@ void X86TargetLowering::insertCopiesSplitCSR( bool X86TargetLowering::supportSwiftError() const { return Subtarget.is64Bit(); } + +/// Returns the name of the symbol used to emit stack probes or the empty +/// string if not applicable. +StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { + // If the function specifically requests stack probes, emit them. + if (MF.getFunction()->hasFnAttribute("probe-stack")) + return MF.getFunction()->getFnAttribute("probe-stack").getValueAsString(); + + // Generally, if we aren't on Windows, the platform ABI does not include + // support for stack probes, so don't emit them. + if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO()) + return ""; + + // We need a stack probe to conform to the Windows ABI. Choose the right + // symbol. + if (Subtarget.is64Bit()) + return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; + return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index f51b6641db2fb..e1ade92979dc0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -615,7 +615,10 @@ namespace llvm { // Vector truncating store with unsigned/signed saturation VTRUNCSTOREUS, VTRUNCSTORES, // Vector truncating masked store with unsigned/signed saturation - VMTRUNCSTOREUS, VMTRUNCSTORES + VMTRUNCSTOREUS, VMTRUNCSTORES, + + // X86 specific gather + MGATHER // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all @@ -1056,6 +1059,8 @@ namespace llvm { bool supportSwiftError() const override; + StringRef getStackProbeSymbolName(MachineFunction &MF) const override; + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } /// \brief Lower interleaved load(s) into target specific @@ -1065,6 +1070,12 @@ namespace llvm { ArrayRef<unsigned> Indices, unsigned Factor) const override; + /// \brief Lower interleaved store(s) into target specific + /// instructions/intrinsics. + bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + unsigned Factor) const override; + + void finalizeLowering(MachineFunction &MF) const override; protected: @@ -1397,6 +1408,19 @@ namespace llvm { } }; + // X86 specific Gather node. + class X86MaskedGatherSDNode : public MaskedGatherScatterSDNode { + public: + X86MaskedGatherSDNode(unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, MMO) + {} + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::MGATHER; + } + }; + } // end namespace llvm #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2620679df2517..01a70323224c3 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7265,13 +7265,13 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> { let Predicates = [HasAVX512] in { def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>; def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>; def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>; def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)), (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>; @@ -7281,13 +7281,13 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> { def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x1))), _.FRC)>; + addr:$src, (i32 0x9))), _.FRC)>; def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x2))), _.FRC)>; + addr:$src, (i32 0xa))), _.FRC)>; def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x3))), _.FRC)>; + addr:$src, (i32 0xb))), _.FRC)>; def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)), addr:$src, (i32 0x4))), _.FRC)>; @@ -7869,7 +7869,7 @@ let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, vx128xmem, mgatherv4i32>, EVEX_V128; defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, - vx64xmem, mgatherv2i64>, EVEX_V128; + vx64xmem, X86mgatherv2i64>, EVEX_V128; } } @@ -8471,26 +8471,26 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _, } let Predicates = [HasAVX512] in { def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>; + (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>; def : Pat<(v16f32 (fnearbyint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; + (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>; def : Pat<(v16f32 (frint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; + (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>; def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>; def : Pat<(v8f64 (fnearbyint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>; def : Pat<(v8f64 (frint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>; } defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c28b35b22977a..8b5bbf24f6f63 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -641,7 +641,7 @@ def sdmem : Operand<v2f64> { // SSE pattern fragments //===----------------------------------------------------------------------===// -// Vector load wrappers to prevent folding of non-temporal aligned loads on +// Vector load wrappers to prevent folding of non-temporal aligned loads on // supporting targets. def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return !Subtarget->hasSSE41() || !cast<LoadSDNode>(N)->isNonTemporal() || @@ -754,16 +754,6 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; -// These are needed to match a scalar memop that is used in a vector-only -// math instruction such as the FP logical ops: andps, andnps, orps, xorps. -// The memory operand is required to be a 128-bit load, so it must be converted -// from a vector to a scalar. -def memopfsf32_128 : PatFrag<(ops node:$ptr), - (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>; -def memopfsf64_128 : PatFrag<(ops node:$ptr), - (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>; - - // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. // FIXME: 8 byte alignment for mmx reads is not required @@ -773,6 +763,9 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>; +def X86masked_gather : SDNode<"X86ISD::MGATHER", SDTMaskedGather, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_gather node:$src1, node:$src2, node:$src3) , [{ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) @@ -796,6 +789,15 @@ def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), Mgt->getBasePtr().getValueType() == MVT::v2i64); return false; }]>; +def X86mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ + if (X86MaskedGatherSDNode *Mgt = dyn_cast<X86MaskedGatherSDNode>(N)) + return (Mgt->getIndex().getValueType() == MVT::v2i64 || + Mgt->getBasePtr().getValueType() == MVT::v2i64) && + (Mgt->getMemoryVT() == MVT::v2i32 || + Mgt->getMemoryVT() == MVT::v2f32); + return false; +}]>; def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_gather node:$src1, node:$src2, node:$src3) , [{ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N)) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8490b972eb5c1..fe87bbd994738 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1744,7 +1744,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; } -def : Pat<(f32 (fpround FR64:$src)), +def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>, Requires<[UseAVX]>; diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index 77dead8d24137..f98c2a7e802dd 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -72,9 +72,24 @@ private: MachineFunction &MF) const; bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; - bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + + // emit insert subreg instruction and insert it before MachineInstr &I + bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, + MachineRegisterInfo &MRI, MachineFunction &MF) const; + // emit extract subreg instruction and insert it before MachineInstr &I + bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, + MachineRegisterInfo &MRI, MachineFunction &MF) const; + + const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; + const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, + MachineRegisterInfo &MRI) const; const X86TargetMachine &TM; const X86Subtarget &STI; @@ -113,8 +128,8 @@ X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, // FIXME: This should be target-independent, inferred from the types declared // for each class in the bank. -static const TargetRegisterClass * -getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { +const TargetRegisterClass * +X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { if (RB.getID() == X86::GPRRegBankID) { if (Ty.getSizeInBits() <= 8) return &X86::GR8RegClass; @@ -127,13 +142,13 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { } if (RB.getID() == X86::VECRRegBankID) { if (Ty.getSizeInBits() == 32) - return &X86::FR32XRegClass; + return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; if (Ty.getSizeInBits() == 64) - return &X86::FR64XRegClass; + return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; if (Ty.getSizeInBits() == 128) - return &X86::VR128XRegClass; + return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass; if (Ty.getSizeInBits() == 256) - return &X86::VR256XRegClass; + return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass; if (Ty.getSizeInBits() == 512) return &X86::VR512RegClass; } @@ -141,10 +156,16 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { llvm_unreachable("Unknown RegBank!"); } +const TargetRegisterClass * +X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg, + MachineRegisterInfo &MRI) const { + const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); + return getRegClass(Ty, RegBank); +} + // Set X86 Opcode and constrain DestReg. -static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { +bool X86InstructionSelector::selectCopy(MachineInstr &I, + MachineRegisterInfo &MRI) const { unsigned DstReg = I.getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { @@ -171,7 +192,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, switch (RegBank.getID()) { case X86::GPRRegBankID: assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values."); - RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + RC = getRegClass(MRI.getType(DstReg), RegBank); // Change the physical register if (SrcSize > DstSize && TargetRegisterInfo::isPhysicalRegister(SrcReg)) { @@ -186,7 +207,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, } break; case X86::VECRRegBankID: - RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + RC = getRegClass(MRI.getType(DstReg), RegBank); break; default: llvm_unreachable("Unknown RegBank!"); @@ -220,7 +241,7 @@ bool X86InstructionSelector::select(MachineInstr &I) const { // Certain non-generic instructions also need some special handling. if (I.isCopy()) - return selectCopy(I, TII, MRI, TRI, RBI); + return selectCopy(I, MRI); // TODO: handle more cases - LOAD_STACK_GUARD, PHI return true; @@ -249,6 +270,10 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectUadde(I, MRI, MF)) return true; + if (selectExtract(I, MRI, MF)) + return true; + if (selectInsert(I, MRI, MF)) + return true; return false; } @@ -326,6 +351,34 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, return Opc; } +// Fill in an address from the given instruction. +void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, + X86AddressMode &AM) { + + assert(I.getOperand(0).isReg() && "unsupported opperand."); + assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && + "unsupported type."); + + if (I.getOpcode() == TargetOpcode::G_GEP) { + if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) { + int64_t Imm = *COff; + if (isInt<32>(Imm)) { // Check for displacement overflow. + AM.Disp = static_cast<int32_t>(Imm); + AM.Base.Reg = I.getOperand(1).getReg(); + return; + } + } + } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) { + AM.Base.FrameIndex = I.getOperand(1).getIndex(); + AM.BaseType = X86AddressMode::FrameIndexBase; + return; + } + + // Default behavior. + AM.Base.Reg = I.getOperand(0).getReg(); + return; +} + bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -340,18 +393,28 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); auto &MemOp = **I.memoperands_begin(); + if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { + DEBUG(dbgs() << "Atomic load/store not supported yet\n"); + return false; + } + unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment()); if (NewOpc == Opc) return false; + X86AddressMode AM; + X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM); + I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); - if (Opc == TargetOpcode::G_LOAD) - addOffset(MIB, 0); - else { + if (Opc == TargetOpcode::G_LOAD) { + I.RemoveOperand(1); + addFullAddress(MIB, AM); + } else { // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) + I.RemoveOperand(1); I.RemoveOperand(0); - addOffset(MIB, 0).addUse(DefReg); + addFullAddress(MIB, AM).addUse(DefReg); } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } @@ -461,11 +524,11 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I, if (DstRB.getID() != X86::GPRRegBankID) return false; - const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); if (!DstRC) return false; - const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); if (!SrcRC) return false; @@ -519,9 +582,8 @@ bool X86InstructionSelector::selectZext(MachineInstr &I, else return false; - const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); unsigned DefReg = - MRI.createVirtualRegister(getRegClassForTypeOnBank(DstTy, RegBank)); + MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::SUBREG_TO_REG), DefReg) @@ -656,6 +718,202 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I, return true; } +bool X86InstructionSelector::selectExtract(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + if (I.getOpcode() != TargetOpcode::G_EXTRACT) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + int64_t Index = I.getOperand(2).getImm(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + + // Meanwile handle vector type only. + if (!DstTy.isVector()) + return false; + + if (Index % DstTy.getSizeInBits() != 0) + return false; // Not extract subvector. + + if (Index == 0) { + // Replace by extract subreg copy. + if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) + return false; + + I.eraseFromParent(); + return true; + } + + bool HasAVX = STI.hasAVX(); + bool HasAVX512 = STI.hasAVX512(); + bool HasVLX = STI.hasVLX(); + + if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { + if (HasVLX) + I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr)); + else if (HasAVX) + I.setDesc(TII.get(X86::VEXTRACTF128rr)); + else + return false; + } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { + if (DstTy.getSizeInBits() == 128) + I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr)); + else if (DstTy.getSizeInBits() == 256) + I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr)); + else + return false; + } else + return false; + + // Convert to X86 VEXTRACT immediate. + Index = Index / DstTy.getSizeInBits(); + I.getOperand(2).setImm(Index); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + +bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, + MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned SubIdx = X86::NoSubRegister; + + if (!DstTy.isVector() || !SrcTy.isVector()) + return false; + + assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && + "Incorrect Src/Dst register size"); + + if (DstTy.getSizeInBits() == 128) + SubIdx = X86::sub_xmm; + else if (DstTy.getSizeInBits() == 256) + SubIdx = X86::sub_ymm; + else + return false; + + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); + + SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + return false; + } + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg) + .addReg(SrcReg, 0, SubIdx); + + return true; +} + +bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, + MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned SubIdx = X86::NoSubRegister; + + // TODO: support scalar types + if (!DstTy.isVector() || !SrcTy.isVector()) + return false; + + assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && + "Incorrect Src/Dst register size"); + + if (SrcTy.getSizeInBits() == 128) + SubIdx = X86::sub_xmm; + else if (SrcTy.getSizeInBits() == 256) + SubIdx = X86::sub_ymm; + else + return false; + + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n"); + return false; + } + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY)) + .addReg(DstReg, RegState::DefineNoRead, SubIdx) + .addReg(SrcReg); + + return true; +} + +bool X86InstructionSelector::selectInsert(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + if (I.getOpcode() != TargetOpcode::G_INSERT) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + const unsigned InsertReg = I.getOperand(2).getReg(); + int64_t Index = I.getOperand(3).getImm(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT InsertRegTy = MRI.getType(InsertReg); + + // Meanwile handle vector type only. + if (!DstTy.isVector()) + return false; + + if (Index % InsertRegTy.getSizeInBits() != 0) + return false; // Not insert subvector. + + if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) { + // Replace by subreg copy. + if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF)) + return false; + + I.eraseFromParent(); + return true; + } + + bool HasAVX = STI.hasAVX(); + bool HasAVX512 = STI.hasAVX512(); + bool HasVLX = STI.hasVLX(); + + if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { + if (HasVLX) + I.setDesc(TII.get(X86::VINSERTF32x4Z256rr)); + else if (HasAVX) + I.setDesc(TII.get(X86::VINSERTF128rr)); + else + return false; + } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { + if (InsertRegTy.getSizeInBits() == 128) + I.setDesc(TII.get(X86::VINSERTF32x4Zrr)); + else if (InsertRegTy.getSizeInBits() == 256) + I.setDesc(TII.get(X86::VINSERTF64x4Zrr)); + else + return false; + } else + return false; + + // Convert to X86 VINSERT immediate. + Index = Index / InsertRegTy.getSizeInBits(); + + I.getOperand(3).setImm(Index); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + InstructionSelector * llvm::createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &Subtarget, diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp index 806d6cc888f0f..f0ed4bc16e2f9 100644 --- a/lib/Target/X86/X86InterleavedAccess.cpp +++ b/lib/Target/X86/X86InterleavedAccess.cpp @@ -16,6 +16,7 @@ #include "X86ISelLowering.h" #include "X86TargetMachine.h" +#include "llvm/Analysis/VectorUtils.h" using namespace llvm; @@ -50,9 +51,8 @@ class X86InterleavedAccessGroup { IRBuilder<> &Builder; /// \brief Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors - /// sub vectors of type \p T. Returns true and the sub-vectors in - /// \p DecomposedVectors if it decomposes the Inst, returns false otherwise. - bool decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T, + /// sub vectors of type \p T. Returns the sub-vectors in \p DecomposedVectors. + void decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T, SmallVectorImpl<Instruction *> &DecomposedVectors); /// \brief Performs matrix transposition on a 4x4 matrix \p InputVectors and @@ -80,8 +80,7 @@ public: /// target information \p STarget. explicit X86InterleavedAccessGroup(Instruction *I, ArrayRef<ShuffleVectorInst *> Shuffs, - ArrayRef<unsigned> Ind, - const unsigned F, + ArrayRef<unsigned> Ind, const unsigned F, const X86Subtarget &STarget, IRBuilder<> &B) : Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget), @@ -102,48 +101,61 @@ bool X86InterleavedAccessGroup::isSupported() const { uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy); Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType(); - if (DL.getTypeSizeInBits(Inst->getType()) < Factor * ShuffleVecSize) - return false; + // Currently, lowering is supported for 4-element vectors of 64 bits on AVX. + uint64_t ExpectedShuffleVecSize; + if (isa<LoadInst>(Inst)) + ExpectedShuffleVecSize = 256; + else + ExpectedShuffleVecSize = 1024; - // Currently, lowering is supported for 64 bits on AVX. - if (!Subtarget.hasAVX() || ShuffleVecSize != 256 || + if (!Subtarget.hasAVX() || ShuffleVecSize != ExpectedShuffleVecSize || DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4) return false; return true; } -bool X86InterleavedAccessGroup::decompose( +void X86InterleavedAccessGroup::decompose( Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy, SmallVectorImpl<Instruction *> &DecomposedVectors) { + + assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) && + "Expected Load or Shuffle"); + Type *VecTy = VecInst->getType(); (void)VecTy; assert(VecTy->isVectorTy() && DL.getTypeSizeInBits(VecTy) >= DL.getTypeSizeInBits(SubVecTy) * NumSubVectors && "Invalid Inst-size!!!"); - assert(VecTy->getVectorElementType() == SubVecTy->getVectorElementType() && - "Element type mismatched!!!"); - if (!isa<LoadInst>(VecInst)) - return false; + if (auto *SVI = dyn_cast<ShuffleVectorInst>(VecInst)) { + Value *Op0 = SVI->getOperand(0); + Value *Op1 = SVI->getOperand(1); + + // Generate N(= NumSubVectors) shuffles of T(= SubVecTy) type. + for (unsigned i = 0; i < NumSubVectors; ++i) + DecomposedVectors.push_back( + cast<ShuffleVectorInst>(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, Indices[i], + SubVecTy->getVectorNumElements(), 0)))); + return; + } + // Decompose the load instruction. LoadInst *LI = cast<LoadInst>(VecInst); Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace()); - Value *VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy); - // Generate N loads of T type + // Generate N loads of T type. for (unsigned i = 0; i < NumSubVectors; i++) { - // TODO: Support inbounds GEP + // TODO: Support inbounds GEP. Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i)); Instruction *NewLoad = Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment()); DecomposedVectors.push_back(NewLoad); } - - return true; } void X86InterleavedAccessGroup::transpose_4x4( @@ -181,21 +193,46 @@ void X86InterleavedAccessGroup::transpose_4x4( // instructions/intrinsics. bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { SmallVector<Instruction *, 4> DecomposedVectors; - VectorType *VecTy = Shuffles[0]->getType(); - // Try to generate target-sized register(/instruction). - if (!decompose(Inst, Factor, VecTy, DecomposedVectors)) - return false; - SmallVector<Value *, 4> TransposedVectors; - // Perform matrix-transposition in order to compute interleaved - // results by generating some sort of (optimized) target-specific - // instructions. + VectorType *ShuffleTy = Shuffles[0]->getType(); + + if (isa<LoadInst>(Inst)) { + // Try to generate target-sized register(/instruction). + decompose(Inst, Factor, ShuffleTy, DecomposedVectors); + + // Perform matrix-transposition in order to compute interleaved + // results by generating some sort of (optimized) target-specific + // instructions. + transpose_4x4(DecomposedVectors, TransposedVectors); + + // Now replace the unoptimized-interleaved-vectors with the + // transposed-interleaved vectors. + for (unsigned i = 0, e = Shuffles.size(); i < e; ++i) + Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]); + + return true; + } + + Type *ShuffleEltTy = ShuffleTy->getVectorElementType(); + unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor; + + // Lower the interleaved stores: + // 1. Decompose the interleaved wide shuffle into individual shuffle + // vectors. + decompose(Shuffles[0], Factor, + VectorType::get(ShuffleEltTy, NumSubVecElems), DecomposedVectors); + + // 2. Transpose the interleaved-vectors into vectors of contiguous + // elements. transpose_4x4(DecomposedVectors, TransposedVectors); - // Now replace the unoptimized-interleaved-vectors with the - // transposed-interleaved vectors. - for (unsigned i = 0; i < Shuffles.size(); i++) - Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]); + // 3. Concatenate the contiguous-vectors back into a wide vector. + Value *WideVec = concatenateVectors(Builder, TransposedVectors); + + // 4. Generate a store instruction for wide-vec. + StoreInst *SI = cast<StoreInst>(Inst); + Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(), + SI->getAlignment()); return true; } @@ -220,3 +257,29 @@ bool X86TargetLowering::lowerInterleavedLoad( return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); } + +bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI, + ShuffleVectorInst *SVI, + unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + + assert(SVI->getType()->getVectorNumElements() % Factor == 0 && + "Invalid interleaved store"); + + // Holds the indices of SVI that correspond to the starting index of each + // interleaved shuffle. + SmallVector<unsigned, 4> Indices; + auto Mask = SVI->getShuffleMask(); + for (unsigned i = 0; i < Factor; i++) + Indices.push_back(Mask[i]); + + ArrayRef<ShuffleVectorInst *> Shuffles = makeArrayRef(SVI); + + // Create an interleaved access group. + IRBuilder<> Builder(SI); + X86InterleavedAccessGroup Grp(SI, Shuffles, Indices, Factor, Subtarget, + Builder); + + return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); +} diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index bc73bb1ae8c51..6b1add8ff8ed1 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -510,12 +510,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti64x4_512, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, @@ -524,16 +518,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCCM, X86ISD::FSETCCM_RND), X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCCM, X86ISD::FSETCCM_RND), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::COMPRESS, 0), X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG, @@ -1171,18 +1159,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FSUBS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FSUBS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_d_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_d_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_d_512, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_q_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_q_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_q_512, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), - X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM, diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index 979aaee110aa4..a584eabcc1b28 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -214,12 +214,24 @@ void X86LegalizerInfo::setLegalizerInfoAVX() { if (!Subtarget.hasAVX()) return; + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); const LLT v8s32 = LLT::vector(8, 32); const LLT v4s64 = LLT::vector(4, 64); for (unsigned MemOp : {G_LOAD, G_STORE}) for (auto Ty : {v8s32, v4s64}) setAction({MemOp, Ty}, Legal); + + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + setAction({G_INSERT, Ty}, Legal); + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) + setAction({G_INSERT, 1, Ty}, Legal); } void X86LegalizerInfo::setLegalizerInfoAVX2() { @@ -243,6 +255,18 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { if (!Subtarget.hasAVX512()) return; + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + const LLT v64s8 = LLT::vector(64, 8); + const LLT v32s16 = LLT::vector(32, 16); const LLT v16s32 = LLT::vector(16, 32); const LLT v8s64 = LLT::vector(8, 64); @@ -256,13 +280,15 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { for (auto Ty : {v16s32, v8s64}) setAction({MemOp, Ty}, Legal); + for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) + setAction({G_INSERT, Ty}, Legal); + for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) + setAction({G_INSERT, 1, Ty}, Legal); + /************ VLX *******************/ if (!Subtarget.hasVLX()) return; - const LLT v4s32 = LLT::vector(4, 32); - const LLT v8s32 = LLT::vector(8, 32); - for (auto Ty : {v4s32, v8s32}) setAction({G_MUL, Ty}, Legal); } diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp index dd21e2b7c4a13..8fdf10617059a 100644 --- a/lib/Target/X86/X86MacroFusion.cpp +++ b/lib/Target/X86/X86MacroFusion.cpp @@ -2,39 +2,31 @@ // // The LLVM Compiler Infrastructure // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This file contains the X86 implementation of the DAG scheduling mutation to -// pair instructions back to back. +/// \file This file contains the X86 implementation of the DAG scheduling +/// mutation to pair instructions back to back. // //===----------------------------------------------------------------------===// #include "X86MacroFusion.h" #include "X86Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" - -#define DEBUG_TYPE "misched" - -STATISTIC(NumFused, "Number of instr pairs fused"); +#include "llvm/CodeGen/MacroFusion.h" using namespace llvm; -static cl::opt<bool> EnableMacroFusion("x86-misched-fusion", cl::Hidden, - cl::desc("Enable scheduling for macro fusion."), cl::init(true)); - -namespace { - -/// \brief Verify that the instruction pair, First and Second, -/// should be scheduled back to back. If either instruction is unspecified, -/// then verify that the other instruction may be part of a pair at all. -static bool shouldScheduleAdjacent(const X86Subtarget &ST, - const MachineInstr *First, - const MachineInstr *Second) { +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI); // Check if this processor supports macro-fusion. Since this is a minor // heuristic, we haven't specifically reserved a feature. hasAVX is a decent // proxy for SandyBridge+. @@ -47,13 +39,10 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST, FuseInc } FuseKind; - assert((First || Second) && "At least one instr must be specified"); - unsigned FirstOpcode = First - ? First->getOpcode() + unsigned FirstOpcode = FirstMI + ? FirstMI->getOpcode() : static_cast<unsigned>(X86::INSTRUCTION_LIST_END); - unsigned SecondOpcode = Second - ? Second->getOpcode() - : static_cast<unsigned>(X86::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); switch (SecondOpcode) { default: @@ -203,69 +192,11 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST, } } -/// \brief Post-process the DAG to create cluster edges between instructions -/// that may be fused by the processor into a single operation. -class X86MacroFusion : public ScheduleDAGMutation { -public: - X86MacroFusion() {} - - void apply(ScheduleDAGInstrs *DAGInstrs) override; -}; - -void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { - ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); - const X86Subtarget &ST = DAG->MF.getSubtarget<X86Subtarget>(); - - // For now, assume targets can only fuse with the branch. - SUnit &ExitSU = DAG->ExitSU; - MachineInstr *Branch = ExitSU.getInstr(); - if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch)) - return; - - for (SDep &PredDep : ExitSU.Preds) { - if (PredDep.isWeak()) - continue; - SUnit &SU = *PredDep.getSUnit(); - MachineInstr &Pred = *SU.getInstr(); - if (!shouldScheduleAdjacent(ST, &Pred, Branch)) - continue; - - // Create a single weak edge from SU to ExitSU. The only effect is to cause - // bottom-up scheduling to heavily prioritize the clustered SU. There is no - // need to copy predecessor edges from ExitSU to SU, since top-down - // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling - // of SU, we could create an artificial edge from the deepest root, but it - // hasn't been needed yet. - bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); - (void)Success; - assert(Success && "No DAG nodes should be reachable from ExitSU"); - - // Adjust latency of data deps between the nodes. - for (SDep &PredDep : ExitSU.Preds) - if (PredDep.getSUnit() == &SU) - PredDep.setLatency(0); - for (SDep &SuccDep : SU.Succs) - if (SuccDep.getSUnit() == &ExitSU) - SuccDep.setLatency(0); - - ++NumFused; - DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; - SU.print(dbgs(), DAG); - dbgs() << " - ExitSU" - << " / " << DAG->TII->getName(Pred.getOpcode()) << " - " - << DAG->TII->getName(Branch->getOpcode()) << '\n';); - - break; - } -} - -} // end namespace - namespace llvm { std::unique_ptr<ScheduleDAGMutation> createX86MacroFusionDAGMutation () { - return EnableMacroFusion ? make_unique<X86MacroFusion>() : nullptr; + return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent); } } // end namespace llvm diff --git a/lib/Target/X86/X86MacroFusion.h b/lib/Target/X86/X86MacroFusion.h index e630f802e8e63..13fa2d78a0185 100644 --- a/lib/Target/X86/X86MacroFusion.h +++ b/lib/Target/X86/X86MacroFusion.h @@ -2,23 +2,18 @@ // // The LLVM Compiler Infrastructure // -// \file This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This file contains the X86 definition of the DAG scheduling mutation to pair -// instructions back to back. +/// \file This file contains the X86 definition of the DAG scheduling mutation +/// to pair instructions back to back. // //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h" #include "llvm/CodeGen/MachineScheduler.h" -//===----------------------------------------------------------------------===// -// X86MacroFusion - DAG post-processing to encourage fusion of macro ops. -//===----------------------------------------------------------------------===// - namespace llvm { /// Note that you have to add: diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 278b57eb00b74..a9f42cacf7886 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -91,6 +91,8 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { return llvm::make_unique<X86FreeBSDTargetObjectFile>(); if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU()) return llvm::make_unique<X86LinuxNaClTargetObjectFile>(); + if (TT.isOSSolaris()) + return llvm::make_unique<X86SolarisTargetObjectFile>(); if (TT.isOSFuchsia()) return llvm::make_unique<X86FuchsiaTargetObjectFile>(); if (TT.isOSBinFormatELF()) diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 4fd95717478e9..8627c06d44313 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -86,6 +86,12 @@ X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(TM.Options.UseInitArray); } +void X86SolarisTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + const MCExpr *X86WindowsTargetObjectFile::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 39d2e84e5ed77..f6aa570b6332a 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -66,6 +66,11 @@ namespace llvm { void Initialize(MCContext &Ctx, const TargetMachine &TM) override; }; + /// \brief This implementation is used for Solaris on x86/x86-64. + class X86SolarisTargetObjectFile : public X86ELFTargetObjectFile { + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + }; + /// \brief This implementation is used for Windows targets on x86 and x86-64. class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF { const MCExpr * diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 11ba7025e1b73..5ba8534d32d33 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2178,17 +2178,6 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); } -bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2) { - // X86 specific here are "instruction number 1st priority". - return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, - C1.NumIVMuls, C1.NumBaseAdds, - C1.ScaleCost, C1.ImmCost, C1.SetupCost) < - std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, - C2.NumIVMuls, C2.NumBaseAdds, - C2.ScaleCost, C2.ImmCost, C2.SetupCost); -} - bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { Type *ScalarTy = DataTy->getScalarType(); int DataWidth = isa<PointerType>(ScalarTy) ? @@ -2243,6 +2232,12 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, return (CallerBits & CalleeBits) == CalleeBits; } +bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { + // TODO: We can increase these based on available vector ops. + MaxLoadSize = ST->is64Bit() ? 8 : 4; + return true; +} + bool X86TTIImpl::enableInterleavedAccessVectorization() { // TODO: We expect this to be beneficial regardless of arch, // but there are currently some unexplained performance artifacts on Atom. @@ -2250,6 +2245,114 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() { return !(ST->isAtom()); } +// Get estimation for interleaved load/store operations for AVX2. +// \p Factor is the interleaved-access factor (stride) - number of +// (interleaved) elements in the group. +// \p Indices contains the indices for a strided load: when the +// interleaved load has gaps they indicate which elements are used. +// If Indices is empty (or if the number of indices is equal to the size +// of the interleaved-access as given in \p Factor) the access has no gaps. +// +// As opposed to AVX-512, AVX2 does not have generic shuffles that allow +// computing the cost using a generic formula as a function of generic +// shuffles. We therefore use a lookup table instead, filled according to +// the instruction sequences that codegen currently generates. +int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace) { + + // We currently Support only fully-interleaved groups, with no gaps. + // TODO: Support also strided loads (interleaved-groups with gaps). + if (Indices.size() && Indices.size() != Factor) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + + // VecTy for interleave memop is <VF*Factor x Elt>. + // So, for VF=4, Interleave Factor = 3, Element type = i32 we have + // VecTy = <12 x i32>. + MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; + + // This function can be called with VecTy=<6xi128>, Factor=3, in which case + // the VF=2, while v2i128 is an unsupported MVT vector type + // (see MachineValueType.h::getVectorVT()). + if (!LegalVT.isVector()) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + + unsigned VF = VecTy->getVectorNumElements() / Factor; + Type *ScalarTy = VecTy->getVectorElementType(); + + // Calculate the number of memory operations (NumOfMemOps), required + // for load/store the VecTy. + unsigned VecTySize = DL.getTypeStoreSize(VecTy); + unsigned LegalVTSize = LegalVT.getStoreSize(); + unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; + + // Get the cost of one memory operation. + Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), + LegalVT.getVectorNumElements()); + unsigned MemOpCost = + getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace); + + VectorType *VT = VectorType::get(ScalarTy, VF); + EVT ETy = TLI->getValueType(DL, VT); + if (!ETy.isSimple()) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + + // TODO: Complete for other data-types and strides. + // Each combination of Stride, ElementTy and VF results in a different + // sequence; The cost tables are therefore accessed with: + // Factor (stride) and VectorType=VFxElemType. + // The Cost accounts only for the shuffle sequence; + // The cost of the loads/stores is accounted for separately. + // + static const CostTblEntry AVX2InterleavedLoadTbl[] = { + { 3, MVT::v2i8, 10 }, //(load 6i8 and) deinterleave into 3 x 2i8 + { 3, MVT::v4i8, 4 }, //(load 12i8 and) deinterleave into 3 x 4i8 + { 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8 + { 3, MVT::v16i8, 18}, //(load 48i8 and) deinterleave into 3 x 16i8 + { 3, MVT::v32i8, 42 }, //(load 96i8 and) deinterleave into 3 x 32i8 + + { 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8 + { 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8 + { 4, MVT::v8i8, 20 }, //(load 32i8 and) deinterleave into 4 x 8i8 + { 4, MVT::v16i8, 39 }, //(load 64i8 and) deinterleave into 4 x 16i8 + { 4, MVT::v32i8, 80 } //(load 128i8 and) deinterleave into 4 x 32i8 + }; + + static const CostTblEntry AVX2InterleavedStoreTbl[] = { + { 3, MVT::v2i8, 7 }, //interleave 3 x 2i8 into 6i8 (and store) + { 3, MVT::v4i8, 8 }, //interleave 3 x 4i8 into 12i8 (and store) + { 3, MVT::v8i8, 11 }, //interleave 3 x 8i8 into 24i8 (and store) + { 3, MVT::v16i8, 17 }, //interleave 3 x 16i8 into 48i8 (and store) + { 3, MVT::v32i8, 32 }, //interleave 3 x 32i8 into 96i8 (and store) + + { 4, MVT::v2i8, 12 }, //interleave 4 x 2i8 into 8i8 (and store) + { 4, MVT::v4i8, 9 }, //interleave 4 x 4i8 into 16i8 (and store) + { 4, MVT::v8i8, 16 }, //interleave 4 x 8i8 into 32i8 (and store) + { 4, MVT::v16i8, 20 }, //interleave 4 x 16i8 into 64i8 (and store) + { 4, MVT::v32i8, 40 } //interleave 4 x 32i8 into 128i8 (and store) + }; + + if (Opcode == Instruction::Load) { + if (const auto *Entry = + CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT())) + return NumOfMemOps * MemOpCost + Entry->Cost; + } else { + assert(Opcode == Instruction::Store && + "Expected Store Instruction at this point"); + if (const auto *Entry = + CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT())) + return NumOfMemOps * MemOpCost + Entry->Cost; + } + + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} + // Get estimation for interleaved load/store operations and strided load. // \p Indices contains indices for strided load. // \p Factor - the factor of interleaving. @@ -2358,6 +2461,10 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (ST->hasAVX512() && HasAVX512Solution && (!RequiresBW || ST->hasBWI())) return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); + if (ST->hasAVX2()) + return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); } diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index 09ce2c90498d9..ad0a0a2113012 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -93,6 +93,9 @@ public: int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace); + int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, + unsigned Factor, ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace); int getIntImmCost(int64_t); @@ -101,15 +104,13 @@ public: int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty); - bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, - TargetTransformInfo::LSRCost &C2); bool isLegalMaskedLoad(Type *DataType); bool isLegalMaskedStore(Type *DataType); bool isLegalMaskedGather(Type *DataType); bool isLegalMaskedScatter(Type *DataType); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); private: int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask, diff --git a/lib/Testing/Support/LLVMBuild.txt b/lib/Testing/Support/LLVMBuild.txt index 40853e8172d55..173cfb4a5587e 100644 --- a/lib/Testing/Support/LLVMBuild.txt +++ b/lib/Testing/Support/LLVMBuild.txt @@ -20,3 +20,4 @@ type = Library name = TestingSupport parent = Libraries required_libraries = Support +installed = 0 diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 797e4ffc2d456..f304b9c9a8dac 100644 --- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -31,7 +31,7 @@ namespace { enum { OPT_INVALID = 0, -#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, #include "Options.inc" #undef OPTION }; @@ -41,11 +41,9 @@ enum { #undef PREFIX static const llvm::opt::OptTable::Info infoTable[] = { -#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ - { \ - X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \ - OPT_##GROUP, OPT_##ALIAS, X6 \ - }, +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10, X11) \ + {X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, \ + X8, X7, OPT_##GROUP, OPT_##ALIAS, X6, X11}, #include "Options.inc" #undef OPTION }; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 4bc64ab698ff9..087a8aa2c624d 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -72,10 +72,6 @@ static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); -static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false), - cl::Hidden, - cl::desc("Run the load combining pass")); - static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); @@ -174,7 +170,6 @@ PassManagerBuilder::PassManagerBuilder() { SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; - LoadCombine = RunLoadCombine; NewGVN = RunNewGVN; DisableGVNLoadPRE = false; VerifyInput = false; @@ -296,6 +291,8 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { InstrProfOptions Options; if (!PGOInstrGen.empty()) Options.InstrProfileOutput = PGOInstrGen; + Options.DoCounterPromotion = true; + MPM.add(createLoopRotatePass()); MPM.add(createInstrProfilingLegacyPass(Options)); } if (!PGOInstrUse.empty()) @@ -407,9 +404,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses( } } - if (LoadCombine) - MPM.add(createLoadCombinePass()); - MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. @@ -850,9 +844,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // alignments. PM.add(createAlignmentFromAssumptionsPass()); - if (LoadCombine) - PM.add(createLoadCombinePass()); - // Cleanup and simplify the code after the scalar optimizations. addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 67bc8f5f6b7ad..656421ee58df8 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -690,6 +690,9 @@ bool SampleProfileLoader::inlineHotFunctions( for (auto I : CIS) { InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); Function *CalledFunction = CallSite(I).getCalledFunction(); + // Do not inline recursive calls. + if (CalledFunction == &F) + continue; Instruction *DI = I; if (!CalledFunction && !PromotedInsns.count(I) && CallSite(I).isIndirectCall()) diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 287a5167fe2ae..d5f0dd1914157 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -988,15 +988,24 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add, return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty); } - // Shifts and add used to flip and mask off the low bit: - // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 - const APInt *C3; - if (C->isOneValue() && - match(Op0, - m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3)))) && - C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) { - Value *NotX = Builder.CreateNot(X); - return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); + if (C->isOneValue() && Op0->hasOneUse()) { + // add (sext i1 X), 1 --> zext (not X) + // TODO: The smallest IR representation is (select X, 0, 1), and that would + // not require the one-use check. But we need to remove a transform in + // visitSelect and make sure that IR value tracking for select is equal or + // better than for these ops. + if (match(Op0, m_SExt(m_Value(X))) && + X->getType()->getScalarSizeInBits() == 1) + return new ZExtInst(Builder.CreateNot(X), Ty); + + // Shifts and add used to flip and mask off the low bit: + // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 + const APInt *C3; + if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) && + C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) { + Value *NotX = Builder.CreateNot(X); + return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); + } } return nullptr; diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a881bda5ba98d..d3d8cefe97353 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1097,20 +1097,11 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, Type *DestTy = Logic.getType(); Type *SrcTy = Cast->getSrcTy(); - // If the first operand is bitcast, move the logic operation ahead of the - // bitcast (do the logic operation in the original type). This can eliminate - // bitcasts and allow combines that would otherwise be impeded by the bitcast. + // Move the logic operation ahead of a zext if the constant is unchanged in + // the smaller source type. Performing the logic in a smaller type may provide + // more information to later folds, and the smaller logic instruction may be + // cheaper (particularly in the case of vectors). Value *X; - if (match(Cast, m_BitCast(m_Value(X)))) { - Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy); - Value *NewOp = Builder->CreateBinOp(LogicOpc, X, NewConstant); - return CastInst::CreateBitOrPointerCast(NewOp, DestTy); - } - - // Similarly, move the logic operation ahead of a zext if the constant is - // unchanged in the smaller source type. Performing the logic in a smaller - // type may provide more information to later folds, and the smaller logic - // instruction may be cheaper (particularly in the case of vectors). if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) { Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy); Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy); @@ -1239,9 +1230,10 @@ static Instruction *foldAndToXor(BinaryOperator &I, // (A | ~B) & (B | ~A) --> ~(A ^ B) // (~B | A) & (~A | B) --> ~(A ^ B) // (~B | A) & (B | ~A) --> ~(A ^ B) - if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + if (Op0->hasOneUse() || Op1->hasOneUse()) + if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); return nullptr; } @@ -1256,9 +1248,10 @@ static Instruction *foldOrToXor(BinaryOperator &I, // Operand complexity canonicalization guarantees that the 'and' is Op0. // (A & B) | ~(A | B) --> ~(A ^ B) // (A & B) | ~(B | A) --> ~(A ^ B) - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) - return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + if (Op0->hasOneUse() || Op1->hasOneUse()) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); // (A & ~B) | (~A & B) --> A ^ B // (A & ~B) | (B & ~A) --> A ^ B @@ -1442,13 +1435,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) - if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse()) + if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C)); // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) - if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse()) + if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) @@ -1579,11 +1572,14 @@ static Value *getSelectCondition(Value *A, Value *B, // If A and B are sign-extended, look through the sexts to find the booleans. Value *Cond; + Value *NotB; if (match(A, m_SExt(m_Value(Cond))) && Cond->getType()->getScalarType()->isIntegerTy(1) && - match(B, m_CombineOr(m_Not(m_SExt(m_Specific(Cond))), - m_SExt(m_Not(m_Specific(Cond)))))) - return Cond; + match(B, m_OneUse(m_Not(m_Value(NotB))))) { + NotB = peekThroughBitcast(NotB, true); + if (match(NotB, m_SExt(m_Specific(Cond)))) + return Cond; + } // All scalar (and most vector) possibilities should be handled now. // Try more matches that only apply to non-splat constant vectors. @@ -1615,12 +1611,8 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D, // The potential condition of the select may be bitcasted. In that case, look // through its bitcast and the corresponding bitcast of the 'not' condition. Type *OrigType = A->getType(); - Value *SrcA, *SrcB; - if (match(A, m_OneUse(m_BitCast(m_Value(SrcA)))) && - match(B, m_OneUse(m_BitCast(m_Value(SrcB))))) { - A = SrcA; - B = SrcB; - } + A = peekThroughBitcast(A, true); + B = peekThroughBitcast(B, true); if (Value *Cond = getSelectCondition(A, B, Builder)) { // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D)) @@ -1922,8 +1914,9 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { /// (A & C1) | B /// /// when the XOR of the two constants is "all ones" (-1). -Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C) { +static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C, + InstCombiner::BuilderTy *Builder) { ConstantInt *CI1 = dyn_cast<ConstantInt>(C); if (!CI1) return nullptr; @@ -1944,15 +1937,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, /// \brief This helper function folds: /// -/// ((A | B) & C1) ^ (B & C2) +/// ((A ^ B) & C1) | (B & C2) /// /// into: /// /// (A & C1) ^ B /// /// when the XOR of the two constants is "all ones" (-1). -Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C) { +static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C, + InstCombiner::BuilderTy *Builder) { ConstantInt *CI1 = dyn_cast<ConstantInt>(C); if (!CI1) return nullptr; @@ -2112,46 +2106,36 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(A, m_Or(m_Value(V1), m_Specific(B))) || - match(A, m_Or(m_Specific(B), m_Value(V1)))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C); - if (Ret) return Ret; + if (match(A, m_c_Or(m_Value(V1), m_Specific(B)))) { + if (Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C, Builder)) + return Ret; } // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(B, m_Or(m_Specific(A), m_Value(V1))) || - match(B, m_Or(m_Value(V1), m_Specific(A)))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D); - if (Ret) return Ret; + if (match(B, m_c_Or(m_Specific(A), m_Value(V1)))) { + if (Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D, Builder)) + return Ret; } // ((A^B)&1)|(B&-2) -> (A&1) ^ B - if (match(A, m_Xor(m_Value(V1), m_Specific(B))) || - match(A, m_Xor(m_Specific(B), m_Value(V1)))) { - Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C); - if (Ret) return Ret; + if (match(A, m_c_Xor(m_Value(V1), m_Specific(B)))) { + if (Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C, Builder)) + return Ret; } // (B&-2)|((A^B)&1) -> (A&1) ^ B - if (match(B, m_Xor(m_Specific(A), m_Value(V1))) || - match(B, m_Xor(m_Value(V1), m_Specific(A)))) { - Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D); - if (Ret) return Ret; + if (match(B, m_c_Xor(m_Specific(A), m_Value(V1)))) { + if (Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D, Builder)) + return Ret; } } // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C - // FIXME: The two hasOneUse calls here are the same call, maybe we were - // supposed to check Op1->operand(0)? if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) - if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse()) - return BinaryOperator::CreateOr(Op0, C); + return BinaryOperator::CreateOr(Op0, C); // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C - // FIXME: The two hasOneUse calls here are the same call, maybe we were - // supposed to check Op0->operand(0)? if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) - if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse()) - return BinaryOperator::CreateOr(Op1, C); + return BinaryOperator::CreateOr(Op1, C); // ((B | C) & A) | B -> B | (A & C) if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) @@ -2357,6 +2341,30 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } + // Instead of trying to imitate the folds for and/or, decompose this 'xor' + // into those logic ops. That is, try to turn this into an and-of-icmps + // because we have many folds for that pattern. + // + // This is based on a truth table definition of xor: + // X ^ Y --> (X | Y) & !(X & Y) + if (Value *OrICmp = SimplifyBinOp(Instruction::Or, LHS, RHS, SQ)) { + // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y). + // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). + if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) { + // TODO: Independently handle cases where the 'and' side is a constant. + if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) { + // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS + RHS->setPredicate(RHS->getInversePredicate()); + return Builder->CreateAnd(LHS, RHS); + } + if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) { + // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS + LHS->setPredicate(LHS->getInversePredicate()); + return Builder->CreateAnd(LHS, RHS); + } + } + } + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index c0830a5d21124..dbed7ad4eae84 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1409,6 +1409,47 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { } } + // Add range metadata since known bits can't completely reflect what we know. + // TODO: Handle splat vectors. + auto *IT = dyn_cast<IntegerType>(Op0->getType()); + if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)), + ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return &II; + } + + return nullptr; +} + +static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) { + assert(II.getIntrinsicID() == Intrinsic::ctpop && + "Expected ctpop intrinsic"); + Value *Op0 = II.getArgOperand(0); + // FIXME: Try to simplify vectors of integers. + auto *IT = dyn_cast<IntegerType>(Op0->getType()); + if (!IT) + return nullptr; + + unsigned BitWidth = IT->getBitWidth(); + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); + + unsigned MinCount = Known.countMinPopulation(); + unsigned MaxCount = Known.countMaxPopulation(); + + // Add range metadata since known bits can't completely reflect what we know. + if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)), + ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))}; + II.setMetadata(LLVMContext::MD_range, + MDNode::get(II.getContext(), LowAndHigh)); + return &II; + } + return nullptr; } @@ -1981,6 +2022,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return I; break; + case Intrinsic::ctpop: + if (auto *I = foldCtpop(*II, *this)) + return I; + break; + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 38e95fb116396..d3049389dfb9f 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1896,6 +1896,18 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X); } + // Canonicalize vector bitcasts to come before vector bitwise logic with a + // constant. This eases recognition of special constants for later ops. + // Example: + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b + Constant *C; + if (match(BO->getOperand(1), m_Constant(C))) { + // bitcast (logic X, C) --> logic (bitcast X, C') + Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy); + Value *CastedC = ConstantExpr::getBitCast(C, DestTy); + return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC); + } + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 1ef4acfb058c4..6ad32490a3288 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2434,6 +2434,77 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, return nullptr; } +bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, + Value *&RHS, ConstantInt *&Less, + ConstantInt *&Equal, + ConstantInt *&Greater) { + // TODO: Generalize this to work with other comparison idioms or ensure + // they get canonicalized into this form. + + // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32 + // Greater), where Equal, Less and Greater are placeholders for any three + // constants. + ICmpInst::Predicate PredA, PredB; + if (match(SI->getTrueValue(), m_ConstantInt(Equal)) && + match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) && + PredA == ICmpInst::ICMP_EQ && + match(SI->getFalseValue(), + m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)), + m_ConstantInt(Less), m_ConstantInt(Greater))) && + PredB == ICmpInst::ICMP_SLT) { + return true; + } + return false; +} + +Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp, + Instruction *Select, + ConstantInt *C) { + + assert(C && "Cmp RHS should be a constant int!"); + // If we're testing a constant value against the result of a three way + // comparison, the result can be expressed directly in terms of the + // original values being compared. Note: We could possibly be more + // aggressive here and remove the hasOneUse test. The original select is + // really likely to simplify or sink when we remove a test of the result. + Value *OrigLHS, *OrigRHS; + ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan; + if (Cmp.hasOneUse() && + matchThreeWayIntCompare(cast<SelectInst>(Select), OrigLHS, OrigRHS, + C1LessThan, C2Equal, C3GreaterThan)) { + assert(C1LessThan && C2Equal && C3GreaterThan); + + bool TrueWhenLessThan = + ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C) + ->isAllOnesValue(); + bool TrueWhenEqual = + ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C) + ->isAllOnesValue(); + bool TrueWhenGreaterThan = + ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C) + ->isAllOnesValue(); + + // This generates the new instruction that will replace the original Cmp + // Instruction. Instead of enumerating the various combinations when + // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus + // false, we rely on chaining of ORs and future passes of InstCombine to + // simplify the OR further (i.e. a s< b || a == b becomes a s<= b). + + // When none of the three constants satisfy the predicate for the RHS (C), + // the entire original Cmp can be simplified to a false. + Value *Cond = Builder->getFalse(); + if (TrueWhenLessThan) + Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS)); + if (TrueWhenEqual) + Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS)); + if (TrueWhenGreaterThan) + Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS)); + + return replaceInstUsesWith(Cmp, Cond); + } + return nullptr; +} + /// Try to fold integer comparisons with a constant operand: icmp Pred X, C /// where X is some kind of instruction. Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { @@ -2493,11 +2564,28 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { return I; } + // Match against CmpInst LHS being instructions other than binary operators. Instruction *LHSI; - if (match(Cmp.getOperand(0), m_Instruction(LHSI)) && - LHSI->getOpcode() == Instruction::Trunc) - if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C)) - return I; + if (match(Cmp.getOperand(0), m_Instruction(LHSI))) { + switch (LHSI->getOpcode()) { + case Instruction::Select: + { + // For now, we only support constant integers while folding the + // ICMP(SELECT)) pattern. We can extend this to support vector of integers + // similar to the cases handled by binary ops above. + if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1))) + if (Instruction *I = foldICmpSelectConstant(Cmp, LHSI, ConstRHS)) + return I; + break; + } + case Instruction::Trunc: + if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C)) + return I; + break; + default: + break; + } + } if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, C)) return I; @@ -3110,8 +3198,7 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { if (BO0) { // Transform A & (L - 1) `ult` L --> L != 0 auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes()); - auto BitwiseAnd = - m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value())); + auto BitwiseAnd = m_c_And(m_Value(), LSubOne); if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) { auto *Zero = Constant::getNullValue(BO0->getType()); diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 1a7db146df426..1b0fe84dd4dda 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -95,6 +95,18 @@ static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) { } } +/// Return the source operand of a potentially bitcasted value while optionally +/// checking if it has one use. If there is no bitcast or the one use check is +/// not met, return the input value itself. +static inline Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) { + if (auto *BitCast = dyn_cast<BitCastInst>(V)) + if (!OneUseOnly || BitCast->hasOneUse()) + return BitCast->getOperand(0); + + // V is not a bitcast or V has more than one use and OneUseOnly is true. + return V; +} + /// \brief Add one to a Constant static inline Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); @@ -276,10 +288,6 @@ public: Instruction *visitFDiv(BinaryOperator &I); Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted); Instruction *visitAnd(BinaryOperator &I); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, - Value *B, Value *C); - Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A, - Value *B, Value *C); Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); @@ -595,6 +603,15 @@ private: Instruction::BinaryOps, Value *, Value *, Value *, Value *); + /// Match a select chain which produces one of three values based on whether + /// the LHS is less than, equal to, or greater than RHS respectively. + /// Return true if we matched a three way compare idiom. The LHS, RHS, Less, + /// Equal and Greater values are saved in the matching process and returned to + /// the caller. + bool matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, Value *&RHS, + ConstantInt *&Less, ConstantInt *&Equal, + ConstantInt *&Greater); + /// \brief Attempts to replace V with a simpler value based on the demanded /// bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known, @@ -672,6 +689,8 @@ private: Instruction *foldICmpBinOp(ICmpInst &Cmp); Instruction *foldICmpEquality(ICmpInst &Cmp); + Instruction *foldICmpSelectConstant(ICmpInst &Cmp, Instruction *Select, + ConstantInt *C); Instruction *foldICmpTruncConstant(ICmpInst &Cmp, Instruction *Trunc, const APInt *C); Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And, diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index a4d84ae81aa02..ca370c73fca44 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -169,6 +169,18 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return nullptr; } +/// Returns true if V is dereferenceable for size of alloca. +static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, + const DataLayout &DL) { + if (AI->isArrayAllocation()) + return false; + uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); + if (!AllocaSize) + return false; + return isDereferenceableAndAlignedPointer(V, AI->getAlignment(), + APInt(64, AllocaSize), DL); +} + static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Check for array size of 1 (scalar allocation). if (!AI.isArrayAllocation()) { @@ -390,7 +402,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT); - if (AI.getAlignment() <= SourceAlign) { + if (AI.getAlignment() <= SourceAlign && + isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) @@ -476,21 +489,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT break; case LLVMContext::MD_nonnull: - // This only directly applies if the new type is also a pointer. - if (NewTy->isPointerTy()) { - NewLoad->setMetadata(ID, N); - break; - } - // If it's integral now, translate it to !range metadata. - if (NewTy->isIntegerTy()) { - auto *ITy = cast<IntegerType>(NewTy); - auto *NullInt = ConstantExpr::getPtrToInt( - ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy); - auto *NonNullInt = - ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); - NewLoad->setMetadata(LLVMContext::MD_range, - MDB.createRange(NonNullInt, NullInt)); - } + copyNonnullMetadata(LI, N, *NewLoad); break; case LLVMContext::MD_align: case LLVMContext::MD_dereferenceable: @@ -500,17 +499,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT NewLoad->setMetadata(ID, N); break; case LLVMContext::MD_range: - // FIXME: It would be nice to propagate this in some way, but the type - // conversions make it hard. - - // If it's a pointer now and the range does not contain 0, make it !nonnull. - if (NewTy->isPointerTy()) { - unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(LI.getContext(), None); - NewLoad->setMetadata(LLVMContext::MD_nonnull, NN); - } - } + copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad); break; } } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index b9674d85634dc..33951e66497a1 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -303,7 +303,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// We want to turn: /// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) /// into: -/// (or (shl (and X, C1), C3), y) +/// (or (shl (and X, C1), C3), Y) /// iff: /// C1 and C2 are both powers of 2 /// where: @@ -317,19 +317,44 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); - if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) + if (!IC || !SI.getType()->isIntegerTy()) return nullptr; Value *CmpLHS = IC->getOperand(0); Value *CmpRHS = IC->getOperand(1); - if (!match(CmpRHS, m_Zero())) - return nullptr; + Value *V; + unsigned C1Log; + bool IsEqualZero; + bool NeedAnd = false; + if (IC->isEquality()) { + if (!match(CmpRHS, m_Zero())) + return nullptr; + + const APInt *C1; + if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1)))) + return nullptr; + + V = CmpLHS; + C1Log = C1->logBase2(); + IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ; + } else if (IC->getPredicate() == ICmpInst::ICMP_SLT || + IC->getPredicate() == ICmpInst::ICMP_SGT) { + // We also need to recognize (icmp slt (trunc (X)), 0) and + // (icmp sgt (trunc (X)), -1). + IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT; + if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) || + (!IsEqualZero && !match(CmpRHS, m_Zero()))) + return nullptr; + + if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V))))) + return nullptr; - Value *X; - const APInt *C1; - if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) + C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1; + NeedAnd = true; + } else { return nullptr; + } const APInt *C2; bool OrOnTrueVal = false; @@ -340,11 +365,27 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, if (!OrOnFalseVal && !OrOnTrueVal) return nullptr; - Value *V = CmpLHS; Value *Y = OrOnFalseVal ? TrueVal : FalseVal; - unsigned C1Log = C1->logBase2(); unsigned C2Log = C2->logBase2(); + + bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal); + bool NeedShift = C1Log != C2Log; + bool NeedZExtTrunc = Y->getType()->getIntegerBitWidth() != + V->getType()->getIntegerBitWidth(); + + // Make sure we don't create more instructions than we save. + Value *Or = OrOnFalseVal ? FalseVal : TrueVal; + if ((NeedShift + NeedXor + NeedZExtTrunc) > + (IC->hasOneUse() + Or->hasOneUse())) + return nullptr; + + if (NeedAnd) { + // Insert the AND instruction on the input to the truncate. + APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log); + V = Builder->CreateAnd(V, ConstantInt::get(V->getType(), C1)); + } + if (C2Log > C1Log) { V = Builder->CreateZExtOrTrunc(V, Y->getType()); V = Builder->CreateShl(V, C2Log - C1Log); @@ -354,9 +395,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, } else V = Builder->CreateZExtOrTrunc(V, Y->getType()); - ICmpInst::Predicate Pred = IC->getPredicate(); - if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) || - (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal)) + if (NeedXor) V = Builder->CreateXor(V, *C2); return Builder->CreateOr(V, Y); diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 8cec865c6422a..1bb1a85367d1b 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -556,8 +556,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { // The inexact versions are deferred to DAGCombine, so we don't hide shl // behind a bit mask. const APInt *ShOp1; - if (match(Op0, m_CombineOr(m_Exact(m_LShr(m_Value(X), m_APInt(ShOp1))), - m_Exact(m_AShr(m_Value(X), m_APInt(ShOp1)))))) { + if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1))))) { unsigned ShrAmt = ShOp1->getZExtValue(); if (ShrAmt < ShAmt) { // If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1) diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 65e6d2e359052..02fac4fb37a4b 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -939,9 +939,19 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // `TrueVInPred`. if (InC && !isa<ConstantExpr>(InC) && isa<ConstantInt>(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - else + else { + // Generate the select in the same block as PN's current incoming block. + // Note: ThisBB need not be the NonConstBB because vector constants + // which are constants by definition are handled here. + // FIXME: This can lead to an increase in IR generation because we might + // generate selects for vector constant phi operand, that could not be + // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For + // non-vector phis, this transformation was always profitable because + // the select would be generated exactly once in the NonConstBB. + Builder->SetInsertPoint(ThisBB->getTerminator()); InV = Builder->CreateSelect(PN->getIncomingValue(i), TrueVInPred, FalseVInPred, "phitmp"); + } NewPN->addIncoming(InV, ThisBB); } } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { @@ -3002,6 +3012,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, ++NumDeadInst; DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); + MadeIRChange = true; continue; } @@ -3015,6 +3026,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, ++NumConstProp; if (isInstructionTriviallyDead(Inst, TLI)) Inst->eraseFromParent(); + MadeIRChange = true; continue; } diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 0d308810009d5..4089d81ea3e1b 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -642,7 +642,12 @@ static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) { if (DisableICP) return false; InstrProfSymtab Symtab; - Symtab.create(M, InLTO); + if (Error E = Symtab.create(M, InLTO)) { + std::string SymtabFailure = toString(std::move(E)); + DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n"); + (void)SymtabFailure; + return false; + } bool Changed = false; for (auto &F : M) { if (F.isDeclaration()) diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 37f88d5f95f18..9c14b0149fdc1 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -19,12 +19,14 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -40,7 +42,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -92,6 +97,35 @@ cl::opt<double> NumCountersPerValueSite( // is usually smaller than 2. cl::init(1.0)); +cl::opt<bool> AtomicCounterUpdatePromoted( + "atomic-counter-update-promoted", cl::ZeroOrMore, + cl::desc("Do counter update using atomic fetch add " + " for promoted counters only"), + cl::init(false)); + +// If the option is not specified, the default behavior about whether +// counter promotion is done depends on how instrumentaiton lowering +// pipeline is setup, i.e., the default value of true of this option +// does not mean the promotion will be done by default. Explicitly +// setting this option can override the default behavior. +cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore, + cl::desc("Do counter register promotion"), + cl::init(false)); +cl::opt<unsigned> MaxNumOfPromotionsPerLoop( + cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10), + cl::desc("Max number counter promotions per loop to avoid" + " increasing register pressure too much")); + +// A debug option +cl::opt<int> + MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1), + cl::desc("Max number of allowed counter promotions")); + +cl::opt<bool> SpeculativeCounterPromotion( + cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false), + cl::desc("Allow counter promotion for loops with multiple exiting blocks " + " or top-tested loops. ")); + class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -116,6 +150,123 @@ public: } }; +/// A helper class to promote one counter RMW operation in the loop +/// into register update. +/// +/// RWM update for the counter will be sinked out of the loop after +/// the transformation. +/// +class PGOCounterPromoterHelper : public LoadAndStorePromoter { +public: + PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA, + Value *Init, BasicBlock *PH, + ArrayRef<BasicBlock *> ExitBlocks, + ArrayRef<Instruction *> InsertPts) + : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), + InsertPts(InsertPts) { + assert(isa<LoadInst>(L)); + assert(isa<StoreInst>(S)); + SSA.AddAvailableValue(PH, Init); + } + void doExtraRewritesBeforeFinalDeletion() const override { + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + Instruction *InsertPos = InsertPts[i]; + // Get LiveIn value into the ExitBlock. If there are multiple + // predecessors, the value is defined by a PHI node in this + // block. + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Value *Addr = cast<StoreInst>(Store)->getPointerOperand(); + IRBuilder<> Builder(InsertPos); + if (AtomicCounterUpdatePromoted) + Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, + AtomicOrdering::SequentiallyConsistent); + else { + LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted"); + auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); + Builder.CreateStore(NewVal, Addr); + } + } + } + +private: + Instruction *Store; + ArrayRef<BasicBlock *> ExitBlocks; + ArrayRef<Instruction *> InsertPts; +}; + +/// A helper class to do register promotion for all profile counter +/// updates in a loop. +/// +class PGOCounterPromoter { +public: + PGOCounterPromoter(ArrayRef<LoadStorePair> Cands, Loop &Loop) + : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) { + + SmallVector<BasicBlock *, 8> LoopExitBlocks; + SmallPtrSet<BasicBlock *, 8> BlockSet; + ParentLoop.getExitBlocks(LoopExitBlocks); + + for (BasicBlock *ExitBlock : LoopExitBlocks) { + if (BlockSet.insert(ExitBlock).second) { + ExitBlocks.push_back(ExitBlock); + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + } + } + } + + bool run(int64_t *NumPromoted) { + // We can't insert into a catchswitch. + bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { + return isa<CatchSwitchInst>(Exit->getTerminator()); + }); + + if (HasCatchSwitch) + return false; + + if (!ParentLoop.hasDedicatedExits()) + return false; + + BasicBlock *PH = ParentLoop.getLoopPreheader(); + if (!PH) + return false; + + BasicBlock *H = ParentLoop.getHeader(); + bool TopTested = + ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H)); + if (!SpeculativeCounterPromotion && + (TopTested || ParentLoop.getExitingBlock() == nullptr)) + return false; + + unsigned Promoted = 0; + for (auto &Cand : Candidates) { + + SmallVector<PHINode *, 4> NewPHIs; + SSAUpdater SSA(&NewPHIs); + Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, + PH, ExitBlocks, InsertPts); + Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second})); + Promoted++; + if (Promoted >= MaxNumOfPromotionsPerLoop) + break; + (*NumPromoted)++; + if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) + break; + } + + DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" + << ParentLoop.getLoopDepth() << ")\n"); + return Promoted != 0; + } + +private: + ArrayRef<LoadStorePair> Candidates; + SmallVector<BasicBlock *, 8> ExitBlocks; + SmallVector<Instruction *, 8> InsertPts; + Loop &ParentLoop; +}; + } // end anonymous namespace PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { @@ -147,6 +298,63 @@ static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { return dyn_cast<InstrProfIncrementInst>(Instr); } +bool InstrProfiling::lowerIntrinsics(Function *F) { + bool MadeChange = false; + PromotionCandidates.clear(); + for (BasicBlock &BB : *F) { + for (auto I = BB.begin(), E = BB.end(); I != E;) { + auto Instr = I++; + InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); + if (Inc) { + lowerIncrement(Inc); + MadeChange = true; + } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) { + lowerValueProfileInst(Ind); + MadeChange = true; + } + } + } + + if (!MadeChange) + return false; + + promoteCounterLoadStores(F); + return true; +} + +bool InstrProfiling::isCounterPromotionEnabled() const { + if (DoCounterPromotion.getNumOccurrences() > 0) + return DoCounterPromotion; + + return Options.DoCounterPromotion; +} + +void InstrProfiling::promoteCounterLoadStores(Function *F) { + if (!isCounterPromotionEnabled()) + return; + + DominatorTree DT(*F); + LoopInfo LI(DT); + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates; + + for (const auto &LoadStore : PromotionCandidates) { + auto *CounterLoad = LoadStore.first; + auto *CounterStore = LoadStore.second; + BasicBlock *BB = CounterLoad->getParent(); + Loop *ParentLoop = LI.getLoopFor(BB); + if (!ParentLoop) + continue; + LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore); + } + + SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder(); + + for (auto *Loop : Loops) { + PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop); + Promoter.run(&TotalCountersPromoted); + } +} + bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { bool MadeChange = false; @@ -179,18 +387,7 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { } for (Function &F : M) - for (BasicBlock &BB : F) - for (auto I = BB.begin(), E = BB.end(); I != E;) { - auto Instr = I++; - InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); - if (Inc) { - lowerIncrement(Inc); - MadeChange = true; - } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) { - lowerValueProfileInst(Ind); - MadeChange = true; - } - } + MadeChange |= lowerIntrinsics(&F); if (GlobalVariable *CoverageNamesVar = M.getNamedGlobal(getCoverageUnusedNamesVarName())) { @@ -303,9 +500,12 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { IRBuilder<> Builder(Inc); uint64_t Index = Inc->getIndex()->getZExtValue(); Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); - Value *Count = Builder.CreateLoad(Addr, "pgocount"); - Count = Builder.CreateAdd(Count, Inc->getStep()); - Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr)); + Value *Load = Builder.CreateLoad(Addr, "pgocount"); + auto *Count = Builder.CreateAdd(Load, Inc->getStep()); + auto *Store = Builder.CreateStore(Count, Addr); + Inc->replaceAllUsesWith(Store); + if (isCounterPromotionEnabled()) + PromotionCandidates.emplace_back(cast<Instruction>(Load), Store); Inc->eraseFromParent(); } diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index b2d95271479c3..0e7d11c553977 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1177,7 +1177,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FuncHash), Builder.CreatePtrToInt(Length, Int64Ty), + Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); ++CurCtrId; } diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index f5196cc461815..457c9427ab9ac 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_library(LLVMScalarOpts LICM.cpp LoopAccessAnalysisPrinter.cpp LoopSink.cpp - LoadCombine.cpp LoopDeletion.cpp LoopDataPrefetch.cpp LoopDistribute.cpp diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 2a4c9526dfcd9..28157783daa7a 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -232,8 +232,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { pred_iterator PB = pred_begin(BB), PE = pred_end(BB); if (PB == PE) return false; - // Analyse each switch case in turn. This is done in reverse order so that - // removing a case doesn't cause trouble for the iteration. + // Analyse each switch case in turn. bool Changed = false; for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); @@ -291,7 +290,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { break; } - // Increment the case iterator sense we didn't delete it. + // Increment the case iterator since we didn't delete it. ++CI; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 0490d93f64553..c0f628eb61e61 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, struct llvm::GVN::Expression { uint32_t opcode; Type *type; + bool commutative; SmallVector<uint32_t, 4> varargs; - Expression(uint32_t o = ~2U) : opcode(o) {} + Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {} bool operator==(const Expression &other) const { if (opcode != other.opcode) @@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); + e.commutative = true; } if (CmpInst *C = dyn_cast<CmpInst>(I)) { @@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (C->getOpcode() << 8) | Predicate; + e.commutative = true; } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (Opcode << 8) | Predicate; + e.commutative = true; return e; } @@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default; /// add - Insert a value into the table with a specified value number. void GVN::ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); + if (PHINode *PN = dyn_cast<PHINode>(V)) + NumberingPhi[num] = PN; } uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = createExpr(C); - uint32_t &e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = createExpr(C); - uint32_t &e = expressionNumbering[exp]; - if (!e) { - e = nextValueNumber++; - valueNumbering[C] = e; - return e; + auto ValNum = assignExpNewValueNum(exp); + if (ValNum.second) { + valueNumbering[C] = ValNum.first; + return ValNum.first; } if (!MD) { - e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; } @@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { case Instruction::ExtractValue: exp = createExtractvalueExpr(cast<ExtractValueInst>(I)); break; + case Instruction::PHI: + valueNumbering[V] = nextValueNumber; + NumberingPhi[nextValueNumber] = cast<PHINode>(V); + return nextValueNumber++; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - uint32_t& e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[V] = e; return e; } /// Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t GVN::ValueTable::lookup(Value *V) const { +uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const { DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); - assert(VI != valueNumbering.end() && "Value not numbered?"); - return VI->second; + if (Verify) { + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; + } + return (VI != valueNumbering.end()) ? VI->second : 0; } /// Returns the value number of the given comparison, @@ -549,21 +559,28 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); - uint32_t& e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; - return e; + return assignExpNewValueNum(exp).first; } /// Remove all entries from the ValueTable. void GVN::ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); + NumberingPhi.clear(); + PhiTranslateTable.clear(); nextValueNumber = 1; + Expressions.clear(); + ExprIdx.clear(); + nextExprNumber = 0; } /// Remove a value from the value numbering. void GVN::ValueTable::erase(Value *V) { + uint32_t Num = valueNumbering.lookup(V); valueNumbering.erase(V); + // If V is PHINode, V <--> value number is an one-to-one mapping. + if (isa<PHINode>(V)) + NumberingPhi.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -602,7 +619,7 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) { +LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const { errs() << "{\n"; for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), E = d.end(); I != E; ++I) { @@ -1451,6 +1468,95 @@ bool GVN::processLoad(LoadInst *L) { return false; } +/// Return a pair the first field showing the value number of \p Exp and the +/// second field showing whether it is a value number newly created. +std::pair<uint32_t, bool> +GVN::ValueTable::assignExpNewValueNum(Expression &Exp) { + uint32_t &e = expressionNumbering[Exp]; + bool CreateNewValNum = !e; + if (CreateNewValNum) { + Expressions.push_back(Exp); + if (ExprIdx.size() < nextValueNumber + 1) + ExprIdx.resize(nextValueNumber * 2); + e = nextValueNumber; + ExprIdx[nextValueNumber++] = nextExprNumber++; + } + return {e, CreateNewValNum}; +} + +/// Return whether all the values related with the same \p num are +/// defined in \p BB. +bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, + GVN &Gvn) { + LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; + while (Vals && Vals->BB == BB) + Vals = Vals->Next; + return !Vals; +} + +/// Wrap phiTranslateImpl to provide caching functionality. +uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, + const BasicBlock *PhiBlock, uint32_t Num, + GVN &Gvn) { + auto FindRes = PhiTranslateTable.find({Num, Pred}); + if (FindRes != PhiTranslateTable.end()) + return FindRes->second; + uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn); + PhiTranslateTable.insert({{Num, Pred}, NewNum}); + return NewNum; +} + +/// Translate value number \p Num using phis, so that it has the values of +/// the phis in BB. +uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, + const BasicBlock *PhiBlock, + uint32_t Num, GVN &Gvn) { + if (PHINode *PN = NumberingPhi[Num]) { + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) + if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) + return TransVal; + } + return Num; + } + + // If there is any value related with Num is defined in a BB other than + // PhiBlock, it cannot depend on a phi in PhiBlock without going through + // a backedge. We can do an early exit in that case to save compile time. + if (!areAllValsInBB(Num, PhiBlock, Gvn)) + return Num; + + if (Num >= ExprIdx.size() || ExprIdx[Num] == 0) + return Num; + Expression Exp = Expressions[ExprIdx[Num]]; + + for (unsigned i = 0; i < Exp.varargs.size(); i++) { + // For InsertValue and ExtractValue, some varargs are index numbers + // instead of value numbers. Those index numbers should not be + // translated. + if ((i > 1 && Exp.opcode == Instruction::InsertValue) || + (i > 0 && Exp.opcode == Instruction::ExtractValue)) + continue; + Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); + } + + if (Exp.commutative) { + assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!"); + if (Exp.varargs[0] > Exp.varargs[1]) { + std::swap(Exp.varargs[0], Exp.varargs[1]); + uint32_t Opcode = Exp.opcode >> 8; + if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) + Exp.opcode = (Opcode << 8) | + CmpInst::getSwappedPredicate( + static_cast<CmpInst::Predicate>(Exp.opcode & 255)); + } + } + + if (uint32_t NewNum = expressionNumbering[Exp]) + return NewNum; + return Num; +} + // In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, // and then scan the list to find one whose block dominates the block in @@ -1495,6 +1601,15 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, return Pred != nullptr; } + +void GVN::assignBlockRPONumber(Function &F) { + uint32_t NextBlockNumber = 1; + ReversePostOrderTraversal<Function *> RPOT(&F); + for (BasicBlock *BB : RPOT) + BlockRPONumber[BB] = NextBlockNumber++; +} + + // Tries to replace instruction with const, using information from // ReplaceWithConstMap. bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { @@ -1856,6 +1971,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // Fabricate val-num for dead-code in order to suppress assertion in // performPRE(). assignValNumForDeadCode(); + assignBlockRPONumber(F); bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); @@ -1927,7 +2043,7 @@ bool GVN::processBlock(BasicBlock *BB) { // Instantiate an expression in a predecessor that lacked it. bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, - unsigned int ValNo) { + BasicBlock *Curr, unsigned int ValNo) { // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any // that weren't originally present will have been instantiated earlier @@ -1945,7 +2061,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, success = false; break; } - if (Value *V = findLeader(Pred, VN.lookup(Op))) { + uint32_t TValNo = + VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this); + if (Value *V = findLeader(Pred, TValNo)) { Instr->setOperand(i, V); } else { success = false; @@ -1962,10 +2080,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, Instr->insertBefore(Pred->getTerminator()); Instr->setName(Instr->getName() + ".pre"); Instr->setDebugLoc(Instr->getDebugLoc()); - VN.add(Instr, ValNo); + + unsigned Num = VN.lookupOrAdd(Instr); + VN.add(Instr, Num); // Update the availability map to include the new instruction. - addToLeaderTable(ValNo, Instr, Pred); + addToLeaderTable(Num, Instr, Pred); return true; } @@ -2003,18 +2123,27 @@ bool GVN::performScalarPRE(Instruction *CurInst) { SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap; for (BasicBlock *P : predecessors(CurrentBlock)) { - // We're not interested in PRE where the block is its - // own predecessor, or in blocks with predecessors - // that are not reachable. - if (P == CurrentBlock) { + // We're not interested in PRE where blocks with predecessors that are + // not reachable. + if (!DT->isReachableFromEntry(P)) { NumWithout = 2; break; - } else if (!DT->isReachableFromEntry(P)) { + } + // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and + // when CurInst has operand defined in CurrentBlock (so it may be defined + // by phi in the loop header). + if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] && + any_of(CurInst->operands(), [&](const Use &U) { + if (auto *Inst = dyn_cast<Instruction>(U.get())) + return Inst->getParent() == CurrentBlock; + return false; + })) { NumWithout = 2; break; } - Value *predV = findLeader(P, ValNo); + uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); + Value *predV = findLeader(P, TValNo); if (!predV) { predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); PREPred = P; @@ -2054,7 +2183,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { } // We need to insert somewhere, so let's give it a shot PREInstr = CurInst->clone(); - if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { + if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) { // If we failed insertion, make sure we remove the instruction. DEBUG(verifyRemoved(PREInstr)); PREInstr->deleteValue(); @@ -2168,6 +2297,7 @@ bool GVN::iterateOnFunction(Function &F) { void GVN::cleanupGlobalSets() { VN.clear(); LeaderTable.clear(); + BlockRPONumber.clear(); TableAllocator.Reset(); } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index c120036464d0a..05293eb0079fc 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -576,7 +577,12 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // Handle compare with phi operand, where the PHI is defined in this block. if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { assert(Preference == WantInteger && "Compares only produce integers"); - PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); + Type *CmpType = Cmp->getType(); + Value *CmpLHS = Cmp->getOperand(0); + Value *CmpRHS = Cmp->getOperand(1); + CmpInst::Predicate Pred = Cmp->getPredicate(); + + PHINode *PN = dyn_cast<PHINode>(CmpLHS); if (PN && PN->getParent() == BB) { const DataLayout &DL = PN->getModule()->getDataLayout(); // We can do this simplification if any comparisons fold to true or false. @@ -584,15 +590,15 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = PN->getIncomingBlock(i); Value *LHS = PN->getIncomingValue(i); - Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); + Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB); - Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL}); + Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL}); if (!Res) { if (!isa<Constant>(RHS)) continue; LazyValueInfo::Tristate - ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS, + ResT = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB, BB, CxtI ? CxtI : Cmp); if (ResT == LazyValueInfo::Unknown) @@ -609,27 +615,67 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. - if (isa<Constant>(Cmp->getOperand(1)) && !Cmp->getType()->isVectorTy()) { - Constant *CmpConst = cast<Constant>(Cmp->getOperand(1)); + if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) { + Constant *CmpConst = cast<Constant>(CmpRHS); - if (!isa<Instruction>(Cmp->getOperand(0)) || - cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { + if (!isa<Instruction>(CmpLHS) || + cast<Instruction>(CmpLHS)->getParent() != BB) { for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = - LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB, CxtI ? CxtI : Cmp); if (Res == LazyValueInfo::Unknown) continue; - Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Constant *ResC = ConstantInt::get(CmpType, Res); Result.push_back(std::make_pair(ResC, P)); } return !Result.empty(); } + // InstCombine can fold some forms of constant range checks into + // (icmp (add (x, C1)), C2). See if we have we have such a thing with + // x as a live-in. + { + using namespace PatternMatch; + Value *AddLHS; + ConstantInt *AddConst; + if (isa<ConstantInt>(CmpConst) && + match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) { + if (!isa<Instruction>(AddLHS) || + cast<Instruction>(AddLHS)->getParent() != BB) { + for (BasicBlock *P : predecessors(BB)) { + // If the value is known by LazyValueInfo to be a ConstantRange in + // a predecessor, use that information to try to thread this + // block. + ConstantRange CR = LVI->getConstantRangeOnEdge( + AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS)); + // Propagate the range through the addition. + CR = CR.add(AddConst->getValue()); + + // Get the range where the compare returns true. + ConstantRange CmpRange = ConstantRange::makeExactICmpRegion( + Pred, cast<ConstantInt>(CmpConst)->getValue()); + + Constant *ResC; + if (CmpRange.contains(CR)) + ResC = ConstantInt::getTrue(CmpType); + else if (CmpRange.inverse().contains(CR)) + ResC = ConstantInt::getFalse(CmpType); + else + continue; + + Result.push_back(std::make_pair(ResC, P)); + } + + return !Result.empty(); + } + } + } + // Try to find a constant value for the LHS of a comparison, // and evaluate it statically if we can. PredValueInfoTy LHSVals; @@ -638,8 +684,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( for (const auto &LHSVal : LHSVals) { Constant *V = LHSVal.first; - Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), - V, CmpConst); + Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst); if (Constant *KC = getKnownConstant(Folded, WantInteger)) Result.push_back(std::make_pair(KC, LHSVal.second)); } @@ -752,6 +797,37 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { LVI->eraseBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); + // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by + // BB code within one basic block `BB`), we need to invalidate the LVI + // information associated with BB, because the LVI information need not be + // true for all of BB after the merge. For example, + // Before the merge, LVI info and code is as follows: + // SinglePred: <LVI info1 for %p val> + // %y = use of %p + // call @exit() // need not transfer execution to successor. + // assume(%p) // from this point on %p is true + // br label %BB + // BB: <LVI info2 for %p val, i.e. %p is true> + // %x = use of %p + // br label exit + // + // Note that this LVI info for blocks BB and SinglPred is correct for %p + // (info2 and info1 respectively). After the merge and the deletion of the + // LVI info1 for SinglePred. We have the following code: + // BB: <LVI info2 for %p val> + // %y = use of %p + // call @exit() + // assume(%p) + // %x = use of %p <-- LVI info2 is correct from here onwards. + // br label exit + // LVI info2 for BB is incorrect at the beginning of BB. + + // Invalidate LVI information for BB if the LVI is not provably true for + // all of BB. + if (any_of(*BB, [](Instruction &I) { + return !isGuaranteedToTransferExecutionToSuccessor(&I); + })) + LVI->eraseBlock(BB); return true; } } diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp deleted file mode 100644 index 025ba1bfedc18..0000000000000 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ /dev/null @@ -1,295 +0,0 @@ -//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This transformation combines adjacent loads. -/// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/TargetFolder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; - -#define DEBUG_TYPE "load-combine" - -STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); -STATISTIC(NumLoadsCombined, "Number of loads combined"); - -#define LDCOMBINE_NAME "Combine Adjacent Loads" - -namespace { -struct PointerOffsetPair { - Value *Pointer; - APInt Offset; -}; - -struct LoadPOPPair { - LoadInst *Load; - PointerOffsetPair POP; - /// \brief The new load needs to be created before the first load in IR order. - unsigned InsertOrder; -}; - -class LoadCombine : public BasicBlockPass { - LLVMContext *C; - AliasAnalysis *AA; - DominatorTree *DT; - -public: - LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) { - initializeLoadCombinePass(*PassRegistry::getPassRegistry()); - } - - using llvm::Pass::doInitialization; - bool doInitialization(Function &) override; - bool runOnBasicBlock(BasicBlock &BB) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } - - StringRef getPassName() const override { return LDCOMBINE_NAME; } - static char ID; - - typedef IRBuilder<TargetFolder> BuilderTy; - -private: - BuilderTy *Builder; - - PointerOffsetPair getPointerOffsetPair(LoadInst &); - bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &); - bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &); - bool combineLoads(SmallVectorImpl<LoadPOPPair> &); -}; -} - -bool LoadCombine::doInitialization(Function &F) { - DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); - C = &F.getContext(); - return true; -} - -PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { - auto &DL = LI.getModule()->getDataLayout(); - - PointerOffsetPair POP; - POP.Pointer = LI.getPointerOperand(); - unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace()); - POP.Offset = APInt(BitWidth, 0); - - while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) { - if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) { - APInt LastOffset = POP.Offset; - if (!GEP->accumulateConstantOffset(DL, POP.Offset)) { - // Can't handle GEPs with variable indices. - POP.Offset = LastOffset; - return POP; - } - POP.Pointer = GEP->getPointerOperand(); - } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) { - POP.Pointer = BC->getOperand(0); - } - } - return POP; -} - -bool LoadCombine::combineLoads( - DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) { - bool Combined = false; - for (auto &Loads : LoadMap) { - if (Loads.second.size() < 2) - continue; - std::sort(Loads.second.begin(), Loads.second.end(), - [](const LoadPOPPair &A, const LoadPOPPair &B) { - return A.POP.Offset.slt(B.POP.Offset); - }); - if (aggregateLoads(Loads.second)) - Combined = true; - } - return Combined; -} - -/// \brief Try to aggregate loads from a sorted list of loads to be combined. -/// -/// It is guaranteed that no writes occur between any of the loads. All loads -/// have the same base pointer. There are at least two loads. -bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { - assert(Loads.size() >= 2 && "Insufficient loads!"); - LoadInst *BaseLoad = nullptr; - SmallVector<LoadPOPPair, 8> AggregateLoads; - bool Combined = false; - bool ValidPrevOffset = false; - APInt PrevOffset; - uint64_t PrevSize = 0; - for (auto &L : Loads) { - if (ValidPrevOffset == false) { - BaseLoad = L.Load; - PrevOffset = L.POP.Offset; - PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( - L.Load->getType()); - AggregateLoads.push_back(L); - ValidPrevOffset = true; - continue; - } - if (L.Load->getAlignment() > BaseLoad->getAlignment()) - continue; - APInt PrevEnd = PrevOffset + PrevSize; - if (L.POP.Offset.sgt(PrevEnd)) { - // No other load will be combinable - if (combineLoads(AggregateLoads)) - Combined = true; - AggregateLoads.clear(); - ValidPrevOffset = false; - continue; - } - if (L.POP.Offset != PrevEnd) - // This load is offset less than the size of the last load. - // FIXME: We may want to handle this case. - continue; - PrevOffset = L.POP.Offset; - PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( - L.Load->getType()); - AggregateLoads.push_back(L); - } - if (combineLoads(AggregateLoads)) - Combined = true; - return Combined; -} - -/// \brief Given a list of combinable load. Combine the maximum number of them. -bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { - // Remove loads from the end while the size is not a power of 2. - unsigned TotalSize = 0; - for (const auto &L : Loads) - TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); - while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) - TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); - if (Loads.size() < 2) - return false; - - DEBUG({ - dbgs() << "***** Combining Loads ******\n"; - for (const auto &L : Loads) { - dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; - } - }); - - // Find first load. This is where we put the new load. - LoadPOPPair FirstLP; - FirstLP.InsertOrder = -1u; - for (const auto &L : Loads) - if (L.InsertOrder < FirstLP.InsertOrder) - FirstLP = L; - - unsigned AddressSpace = - FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); - - Builder->SetInsertPoint(FirstLP.Load); - Value *Ptr = Builder->CreateConstGEP1_64( - Builder->CreatePointerCast(Loads[0].POP.Pointer, - Builder->getInt8PtrTy(AddressSpace)), - Loads[0].POP.Offset.getSExtValue()); - LoadInst *NewLoad = new LoadInst( - Builder->CreatePointerCast( - Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), - Ptr->getType()->getPointerAddressSpace())), - Twine(Loads[0].Load->getName()) + ".combined", false, - Loads[0].Load->getAlignment(), FirstLP.Load); - - for (const auto &L : Loads) { - Builder->SetInsertPoint(L.Load); - Value *V = Builder->CreateExtractInteger( - L.Load->getModule()->getDataLayout(), NewLoad, - cast<IntegerType>(L.Load->getType()), - (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract"); - L.Load->replaceAllUsesWith(V); - } - - NumLoadsCombined += Loads.size(); - return true; -} - -bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { - if (skipBasicBlock(BB)) - return false; - - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - - // Skip analysing dead blocks (not forward reachable from function entry). - if (!DT->isReachableFromEntry(&BB)) { - DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() << - " in " << BB.getParent()->getName() << "\n"); - return false; - } - - IRBuilder<TargetFolder> TheBuilder( - BB.getContext(), TargetFolder(BB.getModule()->getDataLayout())); - Builder = &TheBuilder; - - DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap; - AliasSetTracker AST(*AA); - - bool Combined = false; - unsigned Index = 0; - for (auto &I : BB) { - if (I.mayThrow() || AST.containsUnknown(&I)) { - if (combineLoads(LoadMap)) - Combined = true; - LoadMap.clear(); - AST.clear(); - continue; - } - if (I.mayWriteToMemory()) { - AST.add(&I); - continue; - } - LoadInst *LI = dyn_cast<LoadInst>(&I); - if (!LI) - continue; - ++NumLoadsAnalyzed; - if (!LI->isSimple() || !LI->getType()->isIntegerTy()) - continue; - auto POP = getPointerOffsetPair(*LI); - if (!POP.Pointer) - continue; - LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++}); - AST.add(LI); - } - if (combineLoads(LoadMap)) - Combined = true; - return Combined; -} - -char LoadCombine::ID = 0; - -BasicBlockPass *llvm::createLoadCombinePass() { - return new LoadCombine(); -} - -INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 3151ccd279c41..c41cc42db5e2c 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -31,20 +31,19 @@ using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); /// This function deletes dead loops. The caller of this function needs to -/// guarantee that the loop is infact dead. Here we handle two kinds of dead +/// guarantee that the loop is infact dead. Here we handle two kinds of dead /// loop. The first kind (\p isLoopDead) is where only invariant values from /// within the loop are used outside of it. The second kind (\p /// isLoopNeverExecuted) is where the loop is provably never executed. We can -/// always remove never executed loops since they will not cause any -/// difference to program behaviour. +/// always remove never executed loops since they will not cause any difference +/// to program behaviour. /// /// This also updates the relevant analysis information in \p DT, \p SE, and \p /// LI. It also updates the loop PM if an updater struct is provided. // TODO: This function will be used by loop-simplifyCFG as well. So, move this // to LoopUtils.cpp static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI, bool LoopIsNeverExecuted, - LPMUpdater *Updater = nullptr); + LoopInfo &LI, LPMUpdater *Updater = nullptr); /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -168,7 +167,14 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (ExitBlock && isLoopNeverExecuted(L)) { - deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater); + // Set incoming value to undef for phi nodes in the exit block. + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + for (unsigned i = 0; i < P->getNumIncomingValues(); i++) + P->setIncomingValue(i, UndefValue::get(P->getType())); + BI++; + } + deleteDeadLoop(L, DT, SE, LI, Updater); ++NumDeleted; return true; } @@ -196,15 +202,14 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (isa<SCEVCouldNotCompute>(S)) return Changed; - deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater); + deleteDeadLoop(L, DT, SE, LI, Updater); ++NumDeleted; return true; } static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI, bool LoopIsNeverExecuted, - LPMUpdater *Updater) { + LoopInfo &LI, LPMUpdater *Updater) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); auto *Preheader = L->getLoopPreheader(); assert(Preheader && "Preheader should exist!"); @@ -227,6 +232,8 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, auto *ExitBlock = L->getUniqueExitBlock(); assert(ExitBlock && "Should have a unique exit block!"); + assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); + // Connect the preheader directly to the exit block. // Even when the loop is never executed, we cannot remove the edge from the // source block to the exit block. Consider the case where the unexecuted loop @@ -236,20 +243,28 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // non-loop, it will be deleted in a future iteration of loop deletion pass. Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock); - SmallVector<BasicBlock *, 4> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); // Rewrite phis in the exit block to get their inputs from the Preheader // instead of the exiting block. - BasicBlock *ExitingBlock = ExitingBlocks[0]; BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast<PHINode>(BI)) { - int j = P->getBasicBlockIndex(ExitingBlock); - assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); - if (LoopIsNeverExecuted) - P->setIncomingValue(j, UndefValue::get(P->getType())); - P->setIncomingBlock(j, Preheader); - for (unsigned i = 1; i < ExitingBlocks.size(); ++i) - P->removeIncomingValue(ExitingBlocks[i]); + // Set the zero'th element of Phi to be from the preheader and remove all + // other incoming values. Given the loop has dedicated exits, all other + // incoming values must be from the exiting blocks. + int PredIndex = 0; + P->setIncomingBlock(PredIndex, Preheader); + // Removes all incoming values from all other exiting blocks (including + // duplicate values from an exiting block). + // Nuke all entries except the zero'th entry which is the preheader entry. + // NOTE! We need to remove Incoming Values in the reverse order as done + // below, to keep the indices valid for deletion (removeIncomingValues + // updates getNumIncomingValues and shifts all values down into the operand + // being deleted). + for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) + P->removeIncomingValue(e-i, false); + + assert((P->getNumIncomingValues() == 1 && + P->getIncomingBlock(PredIndex) == Preheader) && + "Should have exactly one value and that's from the preheader!"); ++BI; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b027278b24f2e..73436f13c94e4 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -131,7 +131,7 @@ static cl::opt<bool> EnablePhiElim( // The flag adds instruction count to solutions cost comparision. static cl::opt<bool> InsnsCost( - "lsr-insns-cost", cl::Hidden, cl::init(true), + "lsr-insns-cost", cl::Hidden, cl::init(false), cl::desc("Add instruction count to a LSR cost model")); // Flag to choose how to narrow complex lsr solution diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index cbbd55512c9f5..7a7624f775429 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -1244,27 +1244,24 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // only do this for simple stores, we should expand to cover memcpys, etc. const auto *LastStore = createStoreExpression(SI, StoreRHS); const auto *LastCC = ExpressionToClass.lookup(LastStore); - // Basically, check if the congruence class the store is in is defined by a - // store that isn't us, and has the same value. MemorySSA takes care of - // ensuring the store has the same memory state as us already. - // The RepStoredValue gets nulled if all the stores disappear in a class, so - // we don't need to check if the class contains a store besides us. - if (LastCC && - LastCC->getStoredValue() == lookupOperandLeader(SI->getValueOperand())) + // We really want to check whether the expression we matched was a store. No + // easy way to do that. However, we can check that the class we found has a + // store, which, assuming the value numbering state is not corrupt, is + // sufficient, because we must also be equivalent to that store's expression + // for it to be in the same class as the load. + if (LastCC && LastCC->getStoredValue() == LastStore->getStoredValue()) return LastStore; - deleteExpression(LastStore); // Also check if our value operand is defined by a load of the same memory // location, and the memory state is the same as it was then (otherwise, it // could have been overwritten later. See test32 in // transforms/DeadStoreElimination/simple.ll). - if (auto *LI = - dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) { + if (auto *LI = dyn_cast<LoadInst>(LastStore->getStoredValue())) if ((lookupOperandLeader(LI->getPointerOperand()) == - lookupOperandLeader(SI->getPointerOperand())) && + LastStore->getOperand(0)) && (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) - return createStoreExpression(SI, StoreRHS); - } + return LastStore; + deleteExpression(LastStore); } // If the store is not equivalent to anything, value number it as a store that @@ -2332,9 +2329,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // see if we know some constant value for it already. Value *NewGVN::findConditionEquivalence(Value *Cond) const { auto Result = lookupOperandLeader(Cond); - if (isa<Constant>(Result)) - return Result; - return nullptr; + return isa<Constant>(Result) ? Result : nullptr; } // Process the outgoing edges of a block for reachability. @@ -3014,14 +3009,27 @@ void NewGVN::verifyIterationSettled(Function &F) { // a no-longer valid StoreExpression. void NewGVN::verifyStoreExpressions() const { #ifndef NDEBUG - DenseSet<std::pair<const Value *, const Value *>> StoreExpressionSet; + // This is the only use of this, and it's not worth defining a complicated + // densemapinfo hash/equality function for it. + std::set< + std::pair<const Value *, + std::tuple<const Value *, const CongruenceClass *, Value *>>> + StoreExpressionSet; for (const auto &KV : ExpressionToClass) { if (auto *SE = dyn_cast<StoreExpression>(KV.first)) { // Make sure a version that will conflict with loads is not already there - auto Res = - StoreExpressionSet.insert({SE->getOperand(0), SE->getMemoryLeader()}); - assert(Res.second && - "Stored expression conflict exists in expression table"); + auto Res = StoreExpressionSet.insert( + {SE->getOperand(0), std::make_tuple(SE->getMemoryLeader(), KV.second, + SE->getStoredValue())}); + bool Okay = Res.second; + // It's okay to have the same expression already in there if it is + // identical in nature. + // This can happen when the leader of the stored value changes over time. + if (!Okay) + Okay = (std::get<1>(Res.first->second) == KV.second) && + (lookupOperandLeader(std::get<2>(Res.first->second)) == + lookupOperandLeader(SE->getStoredValue())); + assert(Okay && "Stored expression conflict exists in expression table"); auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst()); assert(ValueExpr && ValueExpr->equals(*SE) && "StoreExpression in ExpressionToClass is not latest " diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index a20890b22603e..6da551bd7efd6 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -106,11 +107,12 @@ XorOpnd::XorOpnd(Value *V) { I->getOpcode() == Instruction::And)) { Value *V0 = I->getOperand(0); Value *V1 = I->getOperand(1); - if (isa<ConstantInt>(V0)) + const APInt *C; + if (match(V0, PatternMatch::m_APInt(C))) std::swap(V0, V1); - if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) { - ConstPart = C->getValue(); + if (match(V1, PatternMatch::m_APInt(C))) { + ConstPart = *C; SymbolicPart = V0; isOr = (I->getOpcode() == Instruction::Or); return; @@ -119,7 +121,7 @@ XorOpnd::XorOpnd(Value *V) { // view the operand as "V | 0" SymbolicPart = V; - ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth()); + ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits()); isOr = true; } @@ -955,8 +957,8 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) { /// Scan backwards and forwards among values with the same rank as element i /// to see if X exists. If X does not exist, return i. This is useful when /// scanning for 'x' when we see '-x' because they both get the same rank. -static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, - Value *X) { +static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops, + unsigned i, Value *X) { unsigned XRank = Ops[i].Rank; unsigned e = Ops.size(); for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) { @@ -1134,20 +1136,19 @@ static Value *OptimizeAndOrXor(unsigned Opcode, /// instruction. There are two special cases: 1) if the constant operand is 0, /// it will return NULL. 2) if the constant is ~0, the symbolic operand will /// be returned. -static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, +static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd) { - if (ConstOpnd != 0) { - if (!ConstOpnd.isAllOnesValue()) { - LLVMContext &Ctx = Opnd->getType()->getContext(); - Instruction *I; - I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd), - "and.ra", InsertBefore); - I->setDebugLoc(InsertBefore->getDebugLoc()); - return I; - } + if (ConstOpnd.isNullValue()) + return nullptr; + + if (ConstOpnd.isAllOnesValue()) return Opnd; - } - return nullptr; + + Instruction *I = BinaryOperator::CreateAnd( + Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra", + InsertBefore); + I->setDebugLoc(InsertBefore->getDebugLoc()); + return I; } // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" @@ -1163,24 +1164,24 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, // = ((x | c1) ^ c1) ^ (c1 ^ c2) // = (x & ~c1) ^ (c1 ^ c2) // It is useful only when c1 == c2. - if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) { - if (!Opnd1->getValue()->hasOneUse()) - return false; + if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue()) + return false; - const APInt &C1 = Opnd1->getConstPart(); - if (C1 != ConstOpnd) - return false; + if (!Opnd1->getValue()->hasOneUse()) + return false; - Value *X = Opnd1->getSymbolicPart(); - Res = createAndInstr(I, X, ~C1); - // ConstOpnd was C2, now C1 ^ C2. - ConstOpnd ^= C1; + const APInt &C1 = Opnd1->getConstPart(); + if (C1 != ConstOpnd) + return false; - if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) - RedoInsts.insert(T); - return true; - } - return false; + Value *X = Opnd1->getSymbolicPart(); + Res = createAndInstr(I, X, ~C1); + // ConstOpnd was C2, now C1 ^ C2. + ConstOpnd ^= C1; + + if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) + RedoInsts.insert(T); + return true; } @@ -1221,8 +1222,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt C3((~C1) ^ C2); // Do not increase code size! - if (C3 != 0 && !C3.isAllOnesValue()) { - int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (!C3.isNullValue() && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; if (NewInstNum > DeadInstNum) return false; } @@ -1238,8 +1239,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt C3 = C1 ^ C2; // Do not increase code size - if (C3 != 0 && !C3.isAllOnesValue()) { - int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (!C3.isNullValue() && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; if (NewInstNum > DeadInstNum) return false; } @@ -1279,17 +1280,20 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, SmallVector<XorOpnd, 8> Opnds; SmallVector<XorOpnd*, 8> OpndPtrs; Type *Ty = Ops[0].Op->getType(); - APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); + APInt ConstOpnd(Ty->getScalarSizeInBits(), 0); // Step 1: Convert ValueEntry to XorOpnd for (unsigned i = 0, e = Ops.size(); i != e; ++i) { Value *V = Ops[i].Op; - if (!isa<ConstantInt>(V)) { + const APInt *C; + // TODO: Support non-splat vectors. + if (match(V, PatternMatch::m_APInt(C))) { + ConstOpnd ^= *C; + } else { XorOpnd O(V); O.setSymbolicRank(getRank(O.getSymbolicPart())); Opnds.push_back(O); - } else - ConstOpnd ^= cast<ConstantInt>(V)->getValue(); + } } // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds". @@ -1327,7 +1331,8 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, Value *CV; // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd" - if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { + if (!ConstOpnd.isNullValue() && + CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { Changed = true; if (CV) *CurrOpnd = XorOpnd(CV); @@ -1369,17 +1374,17 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, ValueEntry VE(getRank(O.getValue()), O.getValue()); Ops.push_back(VE); } - if (ConstOpnd != 0) { - Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + if (!ConstOpnd.isNullValue()) { + Value *C = ConstantInt::get(Ty, ConstOpnd); ValueEntry VE(getRank(C), C); Ops.push_back(VE); } - int Sz = Ops.size(); + unsigned Sz = Ops.size(); if (Sz == 1) return Ops.back().Op; - else if (Sz == 0) { - assert(ConstOpnd == 0); - return ConstantInt::get(Ty->getContext(), ConstOpnd); + if (Sz == 0) { + assert(ConstOpnd.isNullValue()); + return ConstantInt::get(Ty, ConstOpnd); } } @@ -1627,8 +1632,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, /// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)] /// /// \returns Whether any factors have a power greater than one. -bool ReassociatePass::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, - SmallVectorImpl<Factor> &Factors) { +static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, + SmallVectorImpl<Factor> &Factors) { // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this. // Compute the sum of powers of simplifiable factors. unsigned FactorPowerSum = 0; @@ -1999,11 +2004,6 @@ void ReassociatePass::OptimizeInst(Instruction *I) { if (I->isCommutative()) canonicalizeOperands(I); - // TODO: We should optimize vector Xor instructions, but they are - // currently unsupported. - if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor) - return; - // Don't optimize floating point instructions that don't have unsafe algebra. if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra()) return; diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index c6929c33b3e9e..7a6fa1711411d 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -536,9 +536,10 @@ private: void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitFenceInst (FenceInst &I) { /*returns void*/ } void visitInstruction(Instruction &I) { - // If a new instruction is added to LLVM that we don't handle. + // All the instructions we don't do any special handling for just + // go to overdefined. DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n'); - markOverdefined(&I); // Just in case + markOverdefined(&I); } }; @@ -1814,15 +1815,11 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, if (F.isDeclaration()) continue; - if (Solver.isBlockExecutable(&F.front())) { + if (Solver.isBlockExecutable(&F.front())) for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E; - ++AI) { - if (AI->use_empty()) - continue; - if (tryToReplaceWithConstant(Solver, &*AI)) + ++AI) + if (!AI->use_empty() && tryToReplaceWithConstant(Solver, &*AI)) ++IPNumArgsElimed; - } - } for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!Solver.isBlockExecutable(&*BB)) { diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 1527f15f18a33..80fbbeb6829bb 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1252,7 +1252,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { if (!LI || !LI->isSimple()) return false; - // Both operands to the select need to be dereferencable, either + // Both operands to the select need to be dereferenceable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI)) @@ -1637,8 +1637,17 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { return cast<PointerType>(NewTy)->getPointerAddressSpace() == cast<PointerType>(OldTy)->getPointerAddressSpace(); } - if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) - return true; + + // We can convert integers to integral pointers, but not to non-integral + // pointers. + if (OldTy->isIntegerTy()) + return !DL.isNonIntegralPointerType(NewTy); + + // We can convert integral pointers to integers, but non-integral pointers + // need to remain pointers. + if (!DL.isNonIntegralPointerType(OldTy)) + return NewTy->isIntegerTy(); + return false; } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 850a01114eeba..ce6f93eb0c15f 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -91,7 +91,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSeparateConstOffsetFromGEPPass(Registry); initializeSpeculativeExecutionLegacyPassPass(Registry); initializeStraightLineStrengthReducePass(Registry); - initializeLoadCombinePass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); initializeFloat2IntLegacyPassPass(Registry); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 3e5993618c4c0..9397b87cdf563 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -321,7 +321,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls) { /// instruction from after the call to before the call, assuming that all /// instructions between the call and this instruction are movable. /// -static bool canMoveAboveCall(Instruction *I, CallInst *CI) { +static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { // FIXME: We can move load/store/call/free instructions above the call if the // call does not mod/ref the memory location being processed. if (I->mayHaveSideEffects()) // This also handles volatile loads. @@ -332,10 +332,10 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI) { if (CI->mayHaveSideEffects()) { // Non-volatile loads may be moved above a call with side effects if it // does not write to memory and the load provably won't trap. - // FIXME: Writes to memory only matter if they may alias the pointer + // Writes to memory only matter if they may alias the pointer // being loaded from. const DataLayout &DL = L->getModule()->getDataLayout(); - if (CI->mayWriteToMemory() || + if ((AA->getModRefInfo(CI, MemoryLocation::get(L)) & MRI_Mod) || !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getAlignment(), DL, L)) return false; @@ -492,10 +492,11 @@ static CallInst *findTRECandidate(Instruction *TI, return CI; } -static bool -eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl<PHINode *> &ArgumentPHIs) { +static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVectorImpl<PHINode *> &ArgumentPHIs, + AliasAnalysis *AA) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -515,7 +516,8 @@ eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, // Check that this is the case now. BasicBlock::iterator BBI(CI); for (++BBI; &*BBI != Ret; ++BBI) { - if (canMoveAboveCall(&*BBI, CI)) continue; + if (canMoveAboveCall(&*BBI, CI, AA)) + continue; // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed @@ -674,12 +676,17 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI, + AliasAnalysis *AA) { bool Change = false; + // Make sure this block is a trivial return block. + assert(BB->getFirstNonPHIOrDbg() == Ret && + "Trying to fold non-trivial return block"); + // If the return block contains nothing but the return and PHI's, // there might be an opportunity to duplicate the return in its - // predecessors and perform TRC there. Look for predecessors that end + // predecessors and perform TRE there. Look for predecessors that end // in unconditional branch and recursive call(s). SmallVector<BranchInst*, 8> UncondBranchPreds; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { @@ -706,7 +713,7 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret, BB->eraseFromParent(); eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs); + ArgumentPHIs, AA); ++NumRetDuped; Change = true; } @@ -719,16 +726,18 @@ static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail, - const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI, + AliasAnalysis *AA) { CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI); if (!CI) return false; return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs); + ArgumentPHIs, AA); } -static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) { +static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, + AliasAnalysis *AA) { if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") return false; @@ -763,11 +772,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall, TTI); + ArgumentPHIs, !CanTRETailMarkedCall, TTI, AA); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = - foldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall, TTI); + Change = foldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall, TTI, AA); MadeChange |= Change; } } @@ -797,6 +806,7 @@ struct TailCallElim : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); } @@ -805,7 +815,8 @@ struct TailCallElim : public FunctionPass { return false; return eliminateTailRecursion( - F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F)); + F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), + &getAnalysis<AAResultsWrapperPass>().getAAResults()); } }; } @@ -826,8 +837,9 @@ PreservedAnalyses TailCallElimPass::run(Function &F, FunctionAnalysisManager &AM) { TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); + AliasAnalysis &AA = AM.getResult<AAManager>(F); - bool Changed = eliminateTailRecursion(F, &TTI); + bool Changed = eliminateTailRecursion(F, &TTI, &AA); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index ebde1f9a17dd6..b60dfb4f3541d 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -116,6 +116,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_wcslen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strchr: diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 2af671636cbdb..5127eba3f9aea 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -1081,7 +1082,7 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, } /// See if there is a dbg.value intrinsic for DIVar for the PHI node. -static bool PhiHasDebugValue(DILocalVariable *DIVar, +static bool PhiHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, PHINode *APN) { // Since we can't guarantee that the original dbg.declare instrinsic @@ -1159,7 +1160,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DbgValue->insertAfter(LI); } -/// Inserts a llvm.dbg.value intrinsic after a phi +/// Inserts a llvm.dbg.value intrinsic after a phi /// that has an associated llvm.dbg.decl intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, PHINode *APN, DIBuilder &Builder) { @@ -1742,12 +1743,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, // Preserve !invariant.group in K. break; case LLVMContext::MD_align: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; case LLVMContext::MD_dereferenceable: case LLVMContext::MD_dereferenceable_or_null: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; } @@ -1847,6 +1848,49 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { return false; } +void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, + LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + + // This only directly applies if the new type is also a pointer. + if (NewTy->isPointerTy()) { + NewLI.setMetadata(LLVMContext::MD_nonnull, N); + return; + } + + // The only other translation we can do is to integral loads with !range + // metadata. + if (!NewTy->isIntegerTy()) + return; + + MDBuilder MDB(NewLI.getContext()); + const Value *Ptr = OldLI.getPointerOperand(); + auto *ITy = cast<IntegerType>(NewTy); + auto *NullInt = ConstantExpr::getPtrToInt( + ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy); + auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); + NewLI.setMetadata(LLVMContext::MD_range, + MDB.createRange(NonNullInt, NullInt)); +} + +void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, + MDNode *N, LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + + // Give up unless it is converted to a pointer where there is a single very + // valuable mapping we can do reliably. + // FIXME: It would be nice to propagate this in more ways, but the type + // conversions make it hard. + if (!NewTy->isPointerTy()) + return; + + unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLI.getContext(), None); + NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + } +} + namespace { /// A potential constituent of a bitreverse or bswap expression. See /// collectBitParts for a fuller explanation. @@ -1968,7 +2012,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, unsigned NumMaskedBits = AndMask.countPopulation(); if (!MatchBitReversals && NumMaskedBits % 8 != 0) return Result; - + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, MatchBitReversals, BPS); if (!Res) diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index f3db278ef1e49..e21e34df8ded0 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -72,7 +72,6 @@ using namespace llvm; #define DEBUG_TYPE "loop-simplify" -STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); // If the block isn't already, move the new block to right after some 'outside @@ -152,37 +151,6 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, return PreheaderBB; } -/// \brief Ensure that the loop preheader dominates all exit blocks. -/// -/// This method is used to split exit blocks that have predecessors outside of -/// the loop. -static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, - DominatorTree *DT, LoopInfo *LI, - bool PreserveLCSSA) { - SmallVector<BasicBlock*, 8> LoopBlocks; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { - BasicBlock *P = *I; - if (L->contains(P)) { - // Don't do this if the loop is exited via an indirect branch. - if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; - - LoopBlocks.push_back(P); - } - } - - assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewExitBB = nullptr; - - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI, - PreserveLCSSA); - if (!NewExitBB) - return nullptr; - - DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " - << NewExitBB->getName() << "\n"); - return NewExitBB; -} - /// Add the specified block, and all of its predecessors, to the specified set, /// if it's not already in there. Stop predecessor traversal when we reach /// StopBlock. @@ -346,16 +314,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, // Split edges to exit blocks from the inner loop, if they emerged in the // process of separating the outer one. - SmallVector<BasicBlock *, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - for (BasicBlock *ExitBlock : ExitBlockSet) { - if (any_of(predecessors(ExitBlock), - [L](BasicBlock *BB) { return !L->contains(BB); })) { - rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); - } - } + formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); if (PreserveLCSSA) { // Fix LCSSA form for L. Some values, which previously were only used inside @@ -563,29 +522,16 @@ ReprocessLoop: BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); - if (Preheader) { - ++NumInserted; + if (Preheader) Changed = true; - } } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - - SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - for (BasicBlock *ExitBlock : ExitBlockSet) { - if (any_of(predecessors(ExitBlock), - [L](BasicBlock *BB) { return !L->contains(BB); })) { - rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); - ++NumInserted; - Changed = true; - } - } + if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA)) + Changed = true; // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. @@ -614,10 +560,8 @@ ReprocessLoop: // insert a new block that all backedges target, then make it jump to the // loop header. LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); - if (LoopLatch) { - ++NumInserted; + if (LoopLatch) Changed = true; - } } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); @@ -645,7 +589,22 @@ ReprocessLoop: // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. - if (ExitBlockSet.size() == 1) { + auto HasUniqueExitBlock = [&]() { + BasicBlock *UniqueExit = nullptr; + for (auto *ExitingBB : ExitingBlocks) + for (auto *SuccBB : successors(ExitingBB)) { + if (L->contains(SuccBB)) + continue; + + if (!UniqueExit) + UniqueExit = SuccBB; + else if (UniqueExit != SuccBB) + return false; + } + + return true; + }; + if (HasUniqueExitBlock()) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index a920cd86a26a8..5f85e17927fa2 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -472,10 +472,22 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // exit block only. if (!L->isLoopSimplifyForm()) return false; - BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop - if (!Exit) - return false; + // Guaranteed by LoopSimplifyForm. + BasicBlock *Latch = L->getLoopLatch(); + + BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop + if (!LatchExit) + return false; + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be the single exit block out of the loop. This needs + // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + assert((LatchBR->getSuccessor(0) == LatchExit || + LatchBR->getSuccessor(1) == LatchExit) && + "one of the loop latch successors should be " + "the exit block!"); + (void)LatchBR; // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -510,25 +522,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (Log2_32(Count) > BEWidth) return false; - BasicBlock *Latch = L->getLoopLatch(); - - // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the - // targets of the Latch be the single exit block out of the loop. This needs - // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - assert( - (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) && - "one of the loop latch successors should be " - "the exit block!"); - // Avoid warning of unused `LatchBR` variable in release builds. - (void)LatchBR; // Loop structure is the following: // // PreHeader // Header // ... // Latch - // Exit + // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; @@ -541,9 +541,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); - // Split Exit to create phi nodes from branch above. - SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); - NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", + // Split LatchExit to create phi nodes from branch above. + SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); + NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); @@ -570,7 +570,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Latch Header // *NewExit ... // *EpilogPreHeader Latch - // Exit Exit + // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. @@ -648,7 +648,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. - BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; + BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); @@ -672,7 +672,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // EpilogHeader Header // ... ... // EpilogLatch Latch - // Exit Exit + // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. @@ -686,7 +686,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. - ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, + ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 412f6129407ed..0ed33945ef407 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -29,6 +30,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -87,8 +89,7 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT // with a new integer type of the corresponding bit width. - if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), - m_And(m_APInt(M), m_Instruction(I))))) { + if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { int32_t Bits = (*M + 1).exactLogBase2(); if (Bits > 0) { RT = IntegerType::get(Phi->getContext(), Bits); @@ -923,6 +924,69 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return true; } +bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + bool Changed = false; + + // We re-use a vector for the in-loop predecesosrs. + SmallVector<BasicBlock *, 4> InLoopPredecessors; + + auto RewriteExit = [&](BasicBlock *BB) { + assert(InLoopPredecessors.empty() && + "Must start with an empty predecessors list!"); + auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); + + // See if there are any non-loop predecessors of this exit block and + // keep track of the in-loop predecessors. + bool IsDedicatedExit = true; + for (auto *PredBB : predecessors(BB)) + if (L->contains(PredBB)) { + if (isa<IndirectBrInst>(PredBB->getTerminator())) + // We cannot rewrite exiting edges from an indirectbr. + return false; + + InLoopPredecessors.push_back(PredBB); + } else { + IsDedicatedExit = false; + } + + assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); + + // Nothing to do if this is already a dedicated exit. + if (IsDedicatedExit) + return false; + + auto *NewExitBB = SplitBlockPredecessors( + BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); + + if (!NewExitBB) + DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " + << *L << "\n"); + else + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); + return true; + }; + + // Walk the exit blocks directly rather than building up a data structure for + // them, but only visit each one once. + SmallPtrSet<BasicBlock *, 4> Visited; + for (auto *BB : L->blocks()) + for (auto *SuccBB : successors(BB)) { + // We're looking for exit blocks so skip in-loop successors. + if (L->contains(SuccBB)) + continue; + + // Visit each exit block exactly once. + if (!Visited.insert(SuccBB).second) + continue; + + Changed |= RewriteExit(SuccBB); + } + + return Changed; +} + /// \brief Returns the instructions that use values defined in the loop. SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { SmallVector<Instruction *, 8> UsedOutside; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 1abdb24848506..eac2867233bc0 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5702,14 +5702,14 @@ bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I, void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // We should not collect Uniforms more than once per VF. Right now, - // this function is called from collectUniformsAndScalars(), which + // this function is called from collectUniformsAndScalars(), which // already does this check. Collecting Uniforms for VF=1 does not make any // sense. assert(VF >= 2 && !Uniforms.count(VF) && "This function should not be visited twice for the same VF"); - // Visit the list of Uniforms. If we'll not find any uniform value, we'll + // Visit the list of Uniforms. If we'll not find any uniform value, we'll // not analyze again. Uniforms.count(VF) will return 1. Uniforms[VF].clear(); @@ -5988,10 +5988,10 @@ void InterleavedAccessInfo::collectConstStrideAccesses( continue; Value *Ptr = getPointerOperand(&I); - // We don't check wrapping here because we don't know yet if Ptr will be - // part of a full group or a group with gaps. Checking wrapping for all + // We don't check wrapping here because we don't know yet if Ptr will be + // part of a full group or a group with gaps. Checking wrapping for all // pointers (even those that end up in groups with no gaps) will be overly - // conservative. For full groups, wrapping should be ok since if we would + // conservative. For full groups, wrapping should be ok since if we would // wrap around the address space we would do a memory access at nullptr // even without the transformation. The wrapping checks are therefore // deferred until after we've formed the interleaved groups. @@ -6244,7 +6244,7 @@ void InterleavedAccessInfo::analyzeInterleaving( Instruction *LastMember = Group->getMember(Group->getFactor() - 1); if (LastMember) { Value *LastMemberPtr = getPointerOperand(LastMember); - if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, + if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " "last group member potentially pointer-wrapping.\n"); @@ -6252,9 +6252,9 @@ void InterleavedAccessInfo::analyzeInterleaving( } } else { // Case 3: A non-reversed interleaved load group with gaps: We need - // to execute at least one scalar epilogue iteration. This will ensure + // to execute at least one scalar epilogue iteration. This will ensure // we don't speculatively access memory out-of-bounds. We only need - // to look for a member at index factor - 1, since every group must have + // to look for a member at index factor - 1, since every group must have // a member at index zero. if (Group->isReverse()) { releaseGroup(Group); @@ -7789,8 +7789,18 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check the loop for a trip count threshold: // do not vectorize loops with a tiny trip count. - const unsigned MaxTC = SE->getSmallConstantMaxTripCount(L); - if (MaxTC > 0u && MaxTC < TinyTripCountVectorThreshold) { + unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L); + bool HasExpectedTC = (ExpectedTC > 0); + + if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) { + auto EstimatedTC = getLoopEstimatedTripCount(L); + if (EstimatedTC) { + ExpectedTC = *EstimatedTC; + HasExpectedTC = true; + } + } + + if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is not worth vectorizing."); if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) @@ -7822,18 +7832,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize(); - // Compute the weighted frequency of this loop being executed and see if it - // is less than 20% of the function entry baseline frequency. Note that we - // always have a canonical loop here because we think we *can* vectorize. - // FIXME: This is hidden behind a flag due to pervasive problems with - // exactly what block frequency models. - if (LoopVectorizeWithBlockFrequency) { - BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); - if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && - LoopEntryFreq < ColdEntryFreq) - OptForSize = true; - } - // Check the function attributes to see if implicit floats are allowed. // FIXME: This check doesn't seem possibly correct -- what if the loop is // an integer loop and the vector instructions selected are purely integer @@ -8015,11 +8013,6 @@ bool LoopVectorizePass::runImpl( DB = &DB_; ORE = &ORE_; - // Compute some weights outside of the loop over the loops. Compute this - // using a BranchProbability to re-use its scaling math. - const BranchProbability ColdProb(1, 5); // 20% - ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; - // Don't attempt if // 1. the target claims to have no vector registers, and // 2. interleaving won't help ILP. diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index d1349535f2982..b267230d31859 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3327,12 +3327,10 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { BundleMember->Dependencies++; ScheduleData *DestBundle = UseSD->FirstInBundle; - if (!DestBundle->IsScheduled) { + if (!DestBundle->IsScheduled) BundleMember->incrementUnscheduledDeps(1); - } - if (!DestBundle->hasValidDependencies()) { + if (!DestBundle->hasValidDependencies()) WorkList.push_back(DestBundle); - } } } else { // I'm not sure if this can ever happen. But we need to be safe. |